/*
 * Copyright (C) 2018 Synaptics Incorporated. All rights reserved.
 * Copyright Marvell Semiconductor, Inc. 2006. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * INFORMATION CONTAINED IN THIS DOCUMENT IS PROVIDED "AS-IS," AND
 * SYNAPTICS EXPRESSLY DISCLAIMS ALL EXPRESS AND IMPLIED WARRANTIES,
 * INCLUDING ANY IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE, AND ANY WARRANTIES OF NON-INFRINGEMENT OF ANY
 * INTELLECTUAL PROPERTY RIGHTS. IN NO EVENT SHALL SYNAPTICS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, OR
 * CONSEQUENTIAL DAMAGES ARISING OUT OF OR IN CONNECTION WITH THE USE
 * OF THE INFORMATION CONTAINED IN THIS DOCUMENT, HOWEVER CAUSED AND
 * BASED ON ANY THEORY OF LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, AND EVEN IF SYNAPTICS WAS
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. IF A TRIBUNAL OF
 * COMPETENT JURISDICTION DOES NOT PERMIT THE DISCLAIMER OF DIRECT
 * DAMAGES OR ANY OTHER DAMAGES, SYNAPTICS' TOTAL CUMULATIVE LIABILITY
 * TO ANY PARTY SHALL NOT EXCEED ONE HUNDRED U.S. DOLLARS.
 */

//
// This code initialises the Integrator board (eg REMAP) before calling
// TCM Initialization and MMU Initialization if they exist.
// this allows scatter loading to relocate code into the TCMs
//
// This code must be run from EL2

#include "mmu_64.h"

/* Currently we only map 0-4G. in this case, the pagetable should like this.
 * 0x00000000 ->  +--------------------------+
 *                |    L1.0 => [0GB, 1GB)    | -+
 *                |    L1.1 => [1GB, 2GB)    | -+-+
 *                |    L1.2 => [2GB, 3GB)    | -+-+-+
 *                |    L1.3 => [3GB, 4GB)    | -+-+-+-+
 *                |           ...            |  | | | |
 * 0x00001000 ->  +--------------------------+<-' | | |
 *                |         [0GB, 1GB)       |    | | |
 *                |   L2.0.0 => [0MB, 2MB)   |    | | |
 *                |   L2.0.1 => [2MB, 4MB)   |    | | |
 *                |           ...            |    | | |
 *                | L2.0.511 => [1022M, 1GB) |    | | |
 * 0x00002000 ->  +--------------------------+<---' | |
 *                |         [1GB, 2GB)       |      | |
 *                |   L2.1.0 => [0MB, 2MB)   |      | |
 *                |   L2.1.1 => [2MB, 4MB)   |      | |
 *                |           ...            |      | |
 *                | L2.1.511 => [1022M, 1GB) |      | |
 * 0x00003000 ->  +--------------------------+<-----' |
 *                |         [2GB, 3GB)       |        |
 *                |   L2.2.0 => [0MB, 2MB)   |        |
 *                |   L2.2.1 => [2MB, 4MB)   |        |
 *                |           ...            |        |
 *                | L2.2.511 => [1022M, 1GB) |        |
 * 0x00004000 ->  +--------------------------+<-------'
 *                |         [3GB, 4GB)       |
 *                |   L2.3.0 => [0MB, 2MB)   |
 *                |   L2.3.1 => [2MB, 4MB)   |
 *                |           ...            |
 *                | L2.3.511 => [1022M, 1GB) |
 * 0x00005000 ->  +--------------------------+
 */

// NOTE: The start/end address must be within 0G~4G and align with PAGE_SIZE
//       Besides that, in the tought of performace, two adjacent entries will be filled in one cycle.
//       Currently, I do not think it has serious side effect, just mark it here.
.macro  build_level1_table, start_addr, end_addr
	// x0: base address of translation table
	ldr x0, ttb

	// x1: start of l1 page table
	mov x1, #(\start_addr / LEVEL1_BLOCK_SIZE * ENTRY_SIZE + LEVEL1_TABLE_OFFSET)
	add x1, x1, x0

	// x2: end of l1 page table
	mov x2, #(\end_addr / LEVEL1_BLOCK_SIZE * ENTRY_SIZE + LEVEL1_TABLE_OFFSET)
	add x2, x2, x0

	// x3: start of l2 page table
	mov x3, #(\start_addr / LEVEL1_BLOCK_SIZE * PAGE_SIZE + LEVEL2_TABLE_OFFSET)
	add x3, x3, x0

	// x4: l1 table descriptor
	// x5: next l1 table descriptor
	mov x4, #PDM_ATTRS
	orr x4, x4, x3
	add x5, x4, #PAGE_SIZE

	// fill in the table
1:
	stp x4, x5, [x1], #16
	add x4, x4, #(PAGE_SIZE<<1)
	add x5, x5, #(PAGE_SIZE<<1)
	cmp x1, x2
	b.lt 1b
.endm

// NOTE: The start/end address must be within 0G~4G and align with PAGE_SIZE
//	   Besides that, in the tought of performace, two adjacent entries will be filled in one cycle.
//	   Currently, I do not think it has serious side effect, just mark it here.
// attr_index should be one of MT_DEVICE_NGNRNE, MT_DEVICE_NGNRE, MT_NORMAL_NC, MT_NORMAL_C
.macro  build_level2_block, start_addr, end_addr, attr_index
	// x0: base address of translation table
	ldr x0, ttb

	// x1: start of l2 page table
	mov x1, #(\start_addr / LEVEL2_BLOCK_SIZE * ENTRY_SIZE + LEVEL2_TABLE_OFFSET)
	add x1, x1, x0

	// x2: end of l2 page table
	mov x2, #(\end_addr / LEVEL2_BLOCK_SIZE * ENTRY_SIZE + LEVEL2_TABLE_OFFSET)
	add x2, x2, x0

	// x3: l2 block descriptor
	// x4: next l2 block descriptor
	ldr x3, =(\start_addr | PAGE_ATTRS(\attr_index))
	add x4, x3, #LEVEL2_BLOCK_SIZE

	// fill in the table
1:
	stp x3, x4, [x1], #16
	add x3, x3, #(LEVEL2_BLOCK_SIZE<<1)
	add x4, x4, #(LEVEL2_BLOCK_SIZE<<1)
	cmp x1, x2
	b.lt 1b
.endm

.global enable_mmu
enable_mmu:

	str x30, [sp, #-8]!

	mrs x0, sctlr_el2
	ldr x1, =~(SCTLR_M|SCTLR_C)
	and x0, x0, x1
	msr sctlr_el2, x0

	bl invalidate_dcache_all
	bl invalidate_tlb_all

	dsb sy
	isb

	bl flat_map_blocks		 // create translation table

	dsb sy

	ldr x0, =MAIR_VALUE
	msr mair_el2, x0

	ldr x0, =TCR_VALUE
	msr tcr_el2, x0

	ldr x0, ttb
	msr ttbr0_el2, x0

	mrs x0, sctlr_el2
	ldr x1, =(SCTLR_M|SCTLR_I|SCTLR_C)
	orr x0, x0, x1
	msr sctlr_el2, x0

	isb

	ldr x30, [sp], #8
	ret

flat_map_blocks:
	build_level1_table 0, TOTAL_VA_SIZE

	build_level2_block C_MEMORY_ADDR_START, C_MEMORY_ADDR_END, MT_NORMAL_C
	build_level2_block NC_MEMORY_ADDR_START, NC_MEMORY_ADDR_END, MT_NORMAL_NC

	build_level2_block IO_ADDR_START, IO_ADDR_END, MT_DEVICE_NGNRNE

	ret


.global disable_mmu
disable_mmu:
	str x30, [sp, #-8]!

	mrs x0, sctlr_el2
	ldr x1, =~(SCTLR_M|SCTLR_C|SCTLR_I)
	and x0, x0, x1
	msr sctlr_el2, x0
	isb

	bl flush_dcache_all
	bl invalidate_tlb_all

	ldr x30, [sp], #8
	ret

ttb:		.word	__ttb_base_start
