import { Matrix } from 'ml-matrix';
import { similarity } from 'ml-distance';

export const detectEncoding = async (file) => {
  const reader = new FileReader();
  return new Promise((resolve, reject) => {
    reader.onload = () => {
      const result = new TextDecoder().decode(new Uint8Array(reader.result));
      resolve(result.includes('�') ? 'utf-8' : 'utf-8-sig');
    };
    reader.onerror = reject;
    reader.readAsArrayBuffer(file);
  });
};

export const readData = async (file) => {
  const encoding = await detectEncoding(file);
  const reader = new FileReader();
  return new Promise((resolve, reject) => {
    reader.onload = () => {
      const result = reader.result;
      const rows = result.split(/\r?\n/);
      const data = rows.map(row => row.trim().split(/\s+/));
      console.log('Parsed data:', data); // Add debug log
      resolve(data);
    };
    reader.onerror = reject;
    reader.readAsText(file, encoding);
  });
};

export const extractPeakAreaPercentage = (data) => {
  console.log('Original data:', data); // Add debug log

  data = data.slice(1).map(row => [parseFloat(row[1]), parseFloat(row[6])]); // Remove header and select the 2nd and 7th columns
  console.log('Data after selecting columns:', data); // Add debug log

  data = data.filter(row => isFinite(row[0]) && isFinite(row[1])); // Filter out non-numeric and infinite values
  console.log('Data after filtering non-numeric values:', data); // Add debug log

  const mean = data.reduce((acc, row) => acc + row[1], 0) / data.length;
  const stdDev = Math.sqrt(data.reduce((acc, row) => acc + Math.pow(row[1] - mean, 2), 0) / data.length);
  console.log('Mean:', mean, 'Standard Deviation:', stdDev); // Add debug log

  const filteredData = data.filter(row => Math.abs(row[1] - mean) <= 2 * stdDev); // Filter out values outside 2 std devs
  console.log('Data after filtering outliers:', filteredData); // Add debug log

  const removedData = data.filter(row => Math.abs(row[1] - mean) > 2 * stdDev); // Get the removed outliers
  console.log('Removed outliers:', removedData); // Add debug log

  return {
    filteredData,
    totalRows: filteredData.length,
    removedData: removedData || [], // Ensure removedData is always an array
    removedRows: removedData.length,
  };
};

export const getFileDetails = async (file) => {
  const data = await readData(file);
  const result = extractPeakAreaPercentage(data);
  return {
    fileName: file.name,
    totalRows: result.totalRows,
    removedRows: result.removedRows,
    removedData: result.removedData,
  };
};

export const alignData = (data1, data2, tolerance = 0.05) => {
  const alignedData1 = [];
  const alignedData2 = [];
  let index2 = 0;
  const len2 = data2.length;

  data1.forEach(row1 => {
    const [time1, area1] = row1;
    while (index2 < len2 && data2[index2][0] < time1 - tolerance) {
      index2++;
    }
    if (index2 < len2 && Math.abs(data2[index2][0] - time1) <= tolerance) {
      alignedData1.push(area1);
      alignedData2.push(data2[index2][1]);
    } else {
      alignedData1.push(area1);
      alignedData2.push(0);
    }
  });

  for (let i = index2; i < len2; i++) {
    alignedData1.push(0);
    alignedData2.push(data2[i][1]);
  }

  return [alignedData1, alignedData2];
};

/**
 * 计算两个数据集的余弦相似度
 * @param {Array} data1 - 第一个数据集，数组
 * @param {Array} data2 - 第二个数据集，数组
 * @returns {number} - 余弦相似度百分比
 */
export const calculateCosineSimilarity = (data1, data2) => {
  const matrix1 = new Matrix([data1]);
  const matrix2 = new Matrix([data2]);

  if (matrix1.size === 0 || matrix2.size === 0) {
    return 0.0;  // 如果数据为空，返回0相似度
  }

  const dotProduct = matrix1.mmul(matrix2.transpose()).get(0, 0);
  const normA = Math.sqrt(matrix1.mmul(matrix1.transpose()).get(0, 0));
  const normB = Math.sqrt(matrix2.mmul(matrix2.transpose()).get(0, 0));

  if (normA === 0 || normB === 0) {
    return 0.0;  // 防止除以0的情况
  }

  const cosineSimilarity = dotProduct / (normA * normB);
  return cosineSimilarity * 100;  // 转换为百分比
};

/**
 * 简单的移动平均平滑函数
 * @param {Array} data - 原始数据
 * @param {number} windowSize - 移动窗口大小
 * @returns {Array} - 平滑后的数据
 */
const smoothData = (data, windowSize) => {
  const smoothedData = [];
  for (let i = 0; i < data.length; i++) {
    const windowStart = Math.max(0, i - windowSize);
    const windowEnd = Math.min(data.length, i + windowSize + 1);
    const window = data.slice(windowStart, windowEnd);
    const windowAverage = window.reduce((sum, value) => sum + value, 0) / window.length;
    smoothedData.push(windowAverage);
  }
  return smoothedData;
};

export const compareFiles = async (file1, file2) => {
  const data1 = await readData(file1);
  const data2 = await readData(file2);

  const cleanedData1 = extractPeakAreaPercentage(data1).filteredData;
  const cleanedData2 = extractPeakAreaPercentage(data2).filteredData;

  const [alignedData1, alignedData2] = alignData(cleanedData1, cleanedData2);

  // 为了准确性考虑，使用对齐后的数据计算余弦相似度
  const cosineSimilarity = calculateCosineSimilarity(alignedData1, alignedData2);
  console.log('Cosine similarity:', cosineSimilarity); // Add debug log

  // 为了可视化时减少噪音，使用平滑数据
  const smoothedData1 = smoothData(alignedData1, 5);
  const smoothedData2 = smoothData(alignedData2, 5);

  console.log('Aligned data 1:', smoothedData1); // Add debug log
  console.log('Aligned data 2:', smoothedData2); // Add debug log

  return {
    cosineSimilarity,
    alignedData1: smoothedData1,
    alignedData2: smoothedData2
  };
};
