Browse Source

modify chunk size

main
Yasuhiro Yamaguchi 2 years ago
parent
commit
b4156498f2
Signed by: altescy GPG Key ID: 45C9FA6B031084CC
  1. 38
      src/simd.rs

38
src/simd.rs

@ -1,11 +1,11 @@
use std::simd::Simd;
pub fn sum_f32_vector_simd(a: &[f32]) -> f32 {
let chunk_size = 4;
let chunk_size = 16;
let chunks = a.len() / chunk_size;
let mut sum = Simd::<f32, 4>::splat(0.0);
let mut sum = Simd::<f32, 16>::splat(0.0);
for i in 0..chunks {
let a = Simd::<f32, 4>::from_slice(&a[i * chunk_size..]);
let a = Simd::<f32, 16>::from_slice(&a[i * chunk_size..]);
sum += a;
}
let mut result = sum.as_array().iter().sum::<f32>();
@ -21,11 +21,11 @@ pub fn add_f32_vector_simd(a: &[f32], b: &[f32]) -> Vec<f32> {
if a.is_empty() {
return result;
}
let chunk_size = 4;
let chunk_size = 16;
let chunks = a.len() / chunk_size;
for i in 0..chunks {
let a = Simd::<f32, 4>::from_slice(&a[i * chunk_size..]);
let b = Simd::<f32, 4>::from_slice(&b[i * chunk_size..]);
let a = Simd::<f32, 16>::from_slice(&a[i * chunk_size..]);
let b = Simd::<f32, 16>::from_slice(&b[i * chunk_size..]);
let c = a + b;
result.extend_from_slice(c.as_array());
}
@ -41,11 +41,11 @@ pub fn sub_f32_vector_simd(a: &[f32], b: &[f32]) -> Vec<f32> {
if a.is_empty() {
return result;
}
let chunk_size = 4;
let chunk_size = 16;
let chunks = a.len() / chunk_size;
for i in 0..chunks {
let a = Simd::<f32, 4>::from_slice(&a[i * chunk_size..]);
let b = Simd::<f32, 4>::from_slice(&b[i * chunk_size..]);
let a = Simd::<f32, 16>::from_slice(&a[i * chunk_size..]);
let b = Simd::<f32, 16>::from_slice(&b[i * chunk_size..]);
let c = a - b;
result.extend_from_slice(c.as_array());
}
@ -61,11 +61,11 @@ pub fn mul_f32_tensor_simd(a: &[f32], b: &[f32]) -> Vec<f32> {
if a.is_empty() {
return result;
}
let chunk_size = 4;
let chunk_size = 16;
let chunks = a.len() / chunk_size;
for i in 0..chunks {
let a = Simd::<f32, 4>::from_slice(&a[i * chunk_size..]);
let b = Simd::<f32, 4>::from_slice(&b[i * chunk_size..]);
let a = Simd::<f32, 16>::from_slice(&a[i * chunk_size..]);
let b = Simd::<f32, 16>::from_slice(&b[i * chunk_size..]);
let c = a * b;
result.extend_from_slice(c.as_array());
}
@ -81,11 +81,11 @@ pub fn div_f32_tensor_simd(a: &[f32], b: &[f32]) -> Vec<f32> {
if a.is_empty() {
return result;
}
let chunk_size = 4;
let chunk_size = 16;
let chunks = a.len() / chunk_size;
for i in 0..chunks {
let a = Simd::<f32, 4>::from_slice(&a[i * chunk_size..]);
let b = Simd::<f32, 4>::from_slice(&b[i * chunk_size..]);
let a = Simd::<f32, 16>::from_slice(&a[i * chunk_size..]);
let b = Simd::<f32, 16>::from_slice(&b[i * chunk_size..]);
let c = a / b;
result.extend_from_slice(c.as_array());
}
@ -110,7 +110,7 @@ pub fn matmul_f32_tensor_simd(
let n = b_shape[1];
let k = a_shape[1];
let chunk_size = 4;
let chunk_size = 16;
let chunks = k / chunk_size;
let mut result = vec![0.0; m * n];
@ -121,10 +121,10 @@ pub fn matmul_f32_tensor_simd(
}
for i in 0..m {
let arow = &a[i * k..(i + 1) * k];
let mut sum = Simd::<f32, 4>::splat(0.0);
let mut sum = Simd::<f32, 16>::splat(0.0);
for chunk in 0..chunks {
let a = Simd::<f32, 4>::from_slice(&arow[chunk * chunk_size..]);
let b = Simd::<f32, 4>::from_slice(&bcol[chunk * chunk_size..]);
let a = Simd::<f32, 16>::from_slice(&arow[chunk * chunk_size..]);
let b = Simd::<f32, 16>::from_slice(&bcol[chunk * chunk_size..]);
sum += a * b;
}
let mut res = sum.as_array().iter().sum::<f32>();

Loading…
Cancel
Save