From b4156498f26b3cf69b37b5242065f4bf27f131a2 Mon Sep 17 00:00:00 2001 From: altescy Date: Mon, 22 Jan 2024 15:23:07 +0900 Subject: [PATCH] modify chunk size --- src/simd.rs | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/simd.rs b/src/simd.rs index 302204c..5baf20f 100644 --- a/src/simd.rs +++ b/src/simd.rs @@ -1,11 +1,11 @@ use std::simd::Simd; pub fn sum_f32_vector_simd(a: &[f32]) -> f32 { - let chunk_size = 4; + let chunk_size = 16; let chunks = a.len() / chunk_size; - let mut sum = Simd::::splat(0.0); + let mut sum = Simd::::splat(0.0); for i in 0..chunks { - let a = Simd::::from_slice(&a[i * chunk_size..]); + let a = Simd::::from_slice(&a[i * chunk_size..]); sum += a; } let mut result = sum.as_array().iter().sum::(); @@ -21,11 +21,11 @@ pub fn add_f32_vector_simd(a: &[f32], b: &[f32]) -> Vec { if a.is_empty() { return result; } - let chunk_size = 4; + let chunk_size = 16; let chunks = a.len() / chunk_size; for i in 0..chunks { - let a = Simd::::from_slice(&a[i * chunk_size..]); - let b = Simd::::from_slice(&b[i * chunk_size..]); + let a = Simd::::from_slice(&a[i * chunk_size..]); + let b = Simd::::from_slice(&b[i * chunk_size..]); let c = a + b; result.extend_from_slice(c.as_array()); } @@ -41,11 +41,11 @@ pub fn sub_f32_vector_simd(a: &[f32], b: &[f32]) -> Vec { if a.is_empty() { return result; } - let chunk_size = 4; + let chunk_size = 16; let chunks = a.len() / chunk_size; for i in 0..chunks { - let a = Simd::::from_slice(&a[i * chunk_size..]); - let b = Simd::::from_slice(&b[i * chunk_size..]); + let a = Simd::::from_slice(&a[i * chunk_size..]); + let b = Simd::::from_slice(&b[i * chunk_size..]); let c = a - b; result.extend_from_slice(c.as_array()); } @@ -61,11 +61,11 @@ pub fn mul_f32_tensor_simd(a: &[f32], b: &[f32]) -> Vec { if a.is_empty() { return result; } - let chunk_size = 4; + let chunk_size = 16; let chunks = a.len() / chunk_size; for i in 0..chunks { - let a = Simd::::from_slice(&a[i * chunk_size..]); - let b = Simd::::from_slice(&b[i * chunk_size..]); + let a = Simd::::from_slice(&a[i * chunk_size..]); + let b = Simd::::from_slice(&b[i * chunk_size..]); let c = a * b; result.extend_from_slice(c.as_array()); } @@ -81,11 +81,11 @@ pub fn div_f32_tensor_simd(a: &[f32], b: &[f32]) -> Vec { if a.is_empty() { return result; } - let chunk_size = 4; + let chunk_size = 16; let chunks = a.len() / chunk_size; for i in 0..chunks { - let a = Simd::::from_slice(&a[i * chunk_size..]); - let b = Simd::::from_slice(&b[i * chunk_size..]); + let a = Simd::::from_slice(&a[i * chunk_size..]); + let b = Simd::::from_slice(&b[i * chunk_size..]); let c = a / b; result.extend_from_slice(c.as_array()); } @@ -110,7 +110,7 @@ pub fn matmul_f32_tensor_simd( let n = b_shape[1]; let k = a_shape[1]; - let chunk_size = 4; + let chunk_size = 16; let chunks = k / chunk_size; let mut result = vec![0.0; m * n]; @@ -121,10 +121,10 @@ pub fn matmul_f32_tensor_simd( } for i in 0..m { let arow = &a[i * k..(i + 1) * k]; - let mut sum = Simd::::splat(0.0); + let mut sum = Simd::::splat(0.0); for chunk in 0..chunks { - let a = Simd::::from_slice(&arow[chunk * chunk_size..]); - let b = Simd::::from_slice(&bcol[chunk * chunk_size..]); + let a = Simd::::from_slice(&arow[chunk * chunk_size..]); + let b = Simd::::from_slice(&bcol[chunk * chunk_size..]); sum += a * b; } let mut res = sum.as_array().iter().sum::();