blob: 012182e090b9f1b2ab3bef92a654a6bfd7ec0837 [file] [log] [blame]
#![feature(portable_simd)]
use core_simd::simd::*;
fn a(i: usize, j: usize) -> f64 {
((i + j) * (i + j + 1) / 2 + i + 1) as f64
}
fn mult_av(v: &[f64], out: &mut [f64]) {
assert!(v.len() == out.len());
assert!(v.len() % 2 == 0);
for (i, out) in out.iter_mut().enumerate() {
let mut sum = f64x2::splat(0.0);
let mut j = 0;
while j < v.len() {
let b = f64x2::from_slice(&v[j..]);
let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
sum += b / a;
j += 2
}
*out = sum.reduce_sum();
}
}
fn mult_atv(v: &[f64], out: &mut [f64]) {
assert!(v.len() == out.len());
assert!(v.len() % 2 == 0);
for (i, out) in out.iter_mut().enumerate() {
let mut sum = f64x2::splat(0.0);
let mut j = 0;
while j < v.len() {
let b = f64x2::from_slice(&v[j..]);
let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
sum += b / a;
j += 2
}
*out = sum.reduce_sum();
}
}
fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
mult_av(v, tmp);
mult_atv(tmp, out);
}
pub fn spectral_norm(n: usize) -> f64 {
assert!(n % 2 == 0, "only even lengths are accepted");
let mut u = vec![1.0; n];
let mut v = u.clone();
let mut tmp = u.clone();
for _ in 0..10 {
mult_atav(&u, &mut v, &mut tmp);
mult_atav(&v, &mut u, &mut tmp);
}
(dot(&u, &v) / dot(&v, &v)).sqrt()
}
fn dot(x: &[f64], y: &[f64]) -> f64 {
// This is auto-vectorized:
x.iter().zip(y).map(|(&x, &y)| x * y).sum()
}
#[cfg(test)]
#[test]
fn test() {
assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
}
fn main() {
// Empty main to make cargo happy
}