| | """ |
| | Statistical Analysis Module |
| | |
| | Provides rigorous statistical tools for hypothesis testing: |
| | - Paired t-tests for within-subject designs |
| | - Effect sizes (Cohen's d) |
| | - Confidence intervals |
| | - Multiple comparison correction |
| | - Power analysis |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | from dataclasses import dataclass |
| | from typing import List, Tuple, Dict, Any, Optional |
| | import numpy as np |
| | from scipy import stats |
| |
|
| |
|
| | @dataclass |
| | class StatisticalResult: |
| | """ |
| | Result of a statistical test. |
| | |
| | Includes all information needed for scientific reporting: |
| | - Test statistic and p-value |
| | - Effect size with interpretation |
| | - Confidence interval |
| | - Sample statistics |
| | """ |
| | test_name: str |
| | statistic: float |
| | p_value: float |
| | effect_size: float |
| | effect_size_name: str |
| | ci_lower: float |
| | ci_upper: float |
| | confidence_level: float |
| | n: int |
| | mean_diff: float |
| | std_diff: float |
| | significant: bool |
| | interpretation: str |
| |
|
| | def to_dict(self) -> Dict[str, Any]: |
| | """Convert to dictionary for serialization.""" |
| | return { |
| | "test_name": self.test_name, |
| | "statistic": self.statistic, |
| | "p_value": self.p_value, |
| | "effect_size": self.effect_size, |
| | "effect_size_name": self.effect_size_name, |
| | "ci_lower": self.ci_lower, |
| | "ci_upper": self.ci_upper, |
| | "confidence_level": self.confidence_level, |
| | "n": self.n, |
| | "mean_diff": self.mean_diff, |
| | "std_diff": self.std_diff, |
| | "significant": self.significant, |
| | "interpretation": self.interpretation, |
| | } |
| |
|
| | def __str__(self) -> str: |
| | """Human-readable summary.""" |
| | sig_marker = "*" if self.significant else "" |
| | return ( |
| | f"{self.test_name}: t({self.n-1})={self.statistic:.3f}, " |
| | f"p={self.p_value:.4f}{sig_marker}, " |
| | f"{self.effect_size_name}={self.effect_size:.3f}, " |
| | f"95% CI [{self.ci_lower:.4f}, {self.ci_upper:.4f}]" |
| | ) |
| |
|
| |
|
| | def paired_ttest( |
| | condition1: List[float], |
| | condition2: List[float], |
| | alpha: float = 0.05, |
| | alternative: str = "two-sided", |
| | ) -> StatisticalResult: |
| | """ |
| | Perform paired samples t-test. |
| | |
| | For testing H0: μ1 = μ2 vs H1: μ1 ≠ μ2 (or one-sided alternatives) |
| | |
| | Args: |
| | condition1: Scores from first condition |
| | condition2: Scores from second condition (same subjects) |
| | alpha: Significance level |
| | alternative: "two-sided", "greater", or "less" |
| | |
| | Returns: |
| | StatisticalResult with all test statistics |
| | """ |
| | if len(condition1) != len(condition2): |
| | raise ValueError("Conditions must have same length for paired test") |
| |
|
| | n = len(condition1) |
| | if n < 2: |
| | raise ValueError("Need at least 2 observations") |
| |
|
| | c1 = np.array(condition1) |
| | c2 = np.array(condition2) |
| | differences = c1 - c2 |
| |
|
| | |
| | result = stats.ttest_rel(c1, c2, alternative=alternative) |
| |
|
| | |
| | d = compute_effect_size(condition1, condition2, paired=True) |
| |
|
| | |
| | mean_diff = np.mean(differences) |
| | std_diff = np.std(differences, ddof=1) |
| | se = std_diff / np.sqrt(n) |
| | t_crit = stats.t.ppf(1 - alpha / 2, df=n - 1) |
| | ci_lower = mean_diff - t_crit * se |
| | ci_upper = mean_diff + t_crit * se |
| |
|
| | |
| | significant = result.pvalue < alpha |
| | interpretation = _interpret_effect_size(d) |
| |
|
| | return StatisticalResult( |
| | test_name="Paired t-test", |
| | statistic=float(result.statistic), |
| | p_value=float(result.pvalue), |
| | effect_size=float(d), |
| | effect_size_name="Cohen's d", |
| | ci_lower=float(ci_lower), |
| | ci_upper=float(ci_upper), |
| | confidence_level=1 - alpha, |
| | n=n, |
| | mean_diff=float(mean_diff), |
| | std_diff=float(std_diff), |
| | significant=significant, |
| | interpretation=interpretation, |
| | ) |
| |
|
| |
|
| | def independent_ttest( |
| | group1: List[float], |
| | group2: List[float], |
| | alpha: float = 0.05, |
| | equal_var: bool = False, |
| | alternative: str = "two-sided", |
| | ) -> StatisticalResult: |
| | """ |
| | Perform independent samples t-test. |
| | |
| | Args: |
| | group1: Scores from first group |
| | group2: Scores from second group |
| | alpha: Significance level |
| | equal_var: Assume equal variances (use Welch's t-test if False) |
| | alternative: "two-sided", "greater", or "less" |
| | |
| | Returns: |
| | StatisticalResult with all test statistics |
| | """ |
| | g1 = np.array(group1) |
| | g2 = np.array(group2) |
| |
|
| | |
| | result = stats.ttest_ind(g1, g2, equal_var=equal_var, alternative=alternative) |
| |
|
| | |
| | d = compute_effect_size(group1, group2, paired=False) |
| |
|
| | |
| | mean_diff = np.mean(g1) - np.mean(g2) |
| | n1, n2 = len(g1), len(g2) |
| |
|
| | if equal_var: |
| | |
| | pooled_var = ((n1 - 1) * np.var(g1, ddof=1) + (n2 - 1) * np.var(g2, ddof=1)) / (n1 + n2 - 2) |
| | se = np.sqrt(pooled_var * (1/n1 + 1/n2)) |
| | df = n1 + n2 - 2 |
| | else: |
| | |
| | var1, var2 = np.var(g1, ddof=1), np.var(g2, ddof=1) |
| | se = np.sqrt(var1/n1 + var2/n2) |
| | df = (var1/n1 + var2/n2)**2 / ((var1/n1)**2/(n1-1) + (var2/n2)**2/(n2-1)) |
| |
|
| | t_crit = stats.t.ppf(1 - alpha / 2, df=df) |
| | ci_lower = mean_diff - t_crit * se |
| | ci_upper = mean_diff + t_crit * se |
| |
|
| | significant = result.pvalue < alpha |
| | interpretation = _interpret_effect_size(d) |
| |
|
| | return StatisticalResult( |
| | test_name="Independent t-test" + ("" if equal_var else " (Welch's)"), |
| | statistic=float(result.statistic), |
| | p_value=float(result.pvalue), |
| | effect_size=float(d), |
| | effect_size_name="Cohen's d", |
| | ci_lower=float(ci_lower), |
| | ci_upper=float(ci_upper), |
| | confidence_level=1 - alpha, |
| | n=n1 + n2, |
| | mean_diff=float(mean_diff), |
| | std_diff=float(se * np.sqrt(n1 + n2)), |
| | significant=significant, |
| | interpretation=interpretation, |
| | ) |
| |
|
| |
|
| | def compute_effect_size( |
| | group1: List[float], |
| | group2: List[float], |
| | paired: bool = True, |
| | ) -> float: |
| | """ |
| | Compute Cohen's d effect size. |
| | |
| | For paired data: d = mean(diff) / std(diff) |
| | For independent: d = (mean1 - mean2) / pooled_std |
| | |
| | Args: |
| | group1: First group/condition scores |
| | group2: Second group/condition scores |
| | paired: Whether data is paired |
| | |
| | Returns: |
| | Cohen's d effect size |
| | """ |
| | g1 = np.array(group1) |
| | g2 = np.array(group2) |
| |
|
| | if paired: |
| | differences = g1 - g2 |
| | d = np.mean(differences) / np.std(differences, ddof=1) |
| | else: |
| | n1, n2 = len(g1), len(g2) |
| | var1 = np.var(g1, ddof=1) |
| | var2 = np.var(g2, ddof=1) |
| | pooled_std = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)) |
| | d = (np.mean(g1) - np.mean(g2)) / pooled_std |
| |
|
| | return float(d) |
| |
|
| |
|
| | def _interpret_effect_size(d: float) -> str: |
| | """Interpret Cohen's d according to conventional thresholds.""" |
| | abs_d = abs(d) |
| | if abs_d < 0.2: |
| | size = "negligible" |
| | elif abs_d < 0.5: |
| | size = "small" |
| | elif abs_d < 0.8: |
| | size = "medium" |
| | else: |
| | size = "large" |
| |
|
| | direction = "positive" if d > 0 else "negative" if d < 0 else "no" |
| | return f"{size} {direction} effect" |
| |
|
| |
|
| | def compute_confidence_interval( |
| | data: List[float], |
| | confidence: float = 0.95, |
| | ) -> Tuple[float, float, float]: |
| | """ |
| | Compute confidence interval for mean. |
| | |
| | Args: |
| | data: Sample data |
| | confidence: Confidence level (default 0.95 for 95% CI) |
| | |
| | Returns: |
| | Tuple of (mean, ci_lower, ci_upper) |
| | """ |
| | arr = np.array(data) |
| | n = len(arr) |
| | mean = np.mean(arr) |
| | se = stats.sem(arr) |
| |
|
| | alpha = 1 - confidence |
| | t_crit = stats.t.ppf(1 - alpha / 2, df=n - 1) |
| |
|
| | ci_lower = mean - t_crit * se |
| | ci_upper = mean + t_crit * se |
| |
|
| | return float(mean), float(ci_lower), float(ci_upper) |
| |
|
| |
|
| | def bonferroni_correction( |
| | p_values: List[float], |
| | alpha: float = 0.05, |
| | ) -> Tuple[float, List[bool]]: |
| | """ |
| | Apply Bonferroni correction for multiple comparisons. |
| | |
| | Args: |
| | p_values: List of p-values from multiple tests |
| | alpha: Family-wise error rate |
| | |
| | Returns: |
| | Tuple of (corrected_alpha, list of significant results) |
| | """ |
| | n_tests = len(p_values) |
| | corrected_alpha = alpha / n_tests |
| | significant = [p < corrected_alpha for p in p_values] |
| |
|
| | return corrected_alpha, significant |
| |
|
| |
|
| | def holm_bonferroni_correction( |
| | p_values: List[float], |
| | alpha: float = 0.05, |
| | ) -> Tuple[List[float], List[bool]]: |
| | """ |
| | Apply Holm-Bonferroni (step-down) correction. |
| | |
| | More powerful than standard Bonferroni while controlling FWER. |
| | |
| | Args: |
| | p_values: List of p-values from multiple tests |
| | alpha: Family-wise error rate |
| | |
| | Returns: |
| | Tuple of (adjusted_p_values, list of significant results) |
| | """ |
| | n = len(p_values) |
| | indices = np.argsort(p_values) |
| | sorted_p = np.array(p_values)[indices] |
| |
|
| | adjusted_p = np.zeros(n) |
| | significant = [False] * n |
| |
|
| | for i, idx in enumerate(indices): |
| | adjusted_p[idx] = sorted_p[i] * (n - i) |
| |
|
| | |
| | adjusted_p = np.minimum.accumulate(adjusted_p[np.argsort(indices)][::-1])[::-1] |
| | adjusted_p = np.minimum(adjusted_p, 1.0) |
| |
|
| | significant = [p < alpha for p in adjusted_p] |
| |
|
| | return list(adjusted_p), significant |
| |
|
| |
|
| | def power_analysis_paired_ttest( |
| | effect_size: float, |
| | alpha: float = 0.05, |
| | power: float = 0.80, |
| | ) -> int: |
| | """ |
| | Compute required sample size for paired t-test. |
| | |
| | Args: |
| | effect_size: Expected Cohen's d |
| | alpha: Significance level |
| | power: Desired statistical power |
| | |
| | Returns: |
| | Required sample size (N) |
| | """ |
| | from scipy.stats import norm |
| |
|
| | z_alpha = norm.ppf(1 - alpha / 2) |
| | z_beta = norm.ppf(power) |
| |
|
| | n = ((z_alpha + z_beta) / effect_size) ** 2 |
| |
|
| | return int(np.ceil(n)) |
| |
|
| |
|
| | def bootstrap_ci( |
| | data: List[float], |
| | n_bootstrap: int = 10000, |
| | confidence: float = 0.95, |
| | statistic: str = "mean", |
| | seed: int = 42, |
| | ) -> Dict[str, float]: |
| | """ |
| | Compute bootstrap confidence interval. |
| | |
| | Non-parametric CI that makes no distributional assumptions. |
| | Uses BCa (bias-corrected and accelerated) percentile method. |
| | |
| | Args: |
| | data: Sample data |
| | n_bootstrap: Number of bootstrap resamples |
| | confidence: Confidence level (default 0.95 for 95% CI) |
| | statistic: "mean" or "median" |
| | seed: Random seed for reproducibility |
| | |
| | Returns: |
| | Dictionary with point estimate, ci_lower, ci_upper, se |
| | """ |
| | arr = np.array(data) |
| | n = len(arr) |
| | rng = np.random.default_rng(seed) |
| |
|
| | stat_fn = np.mean if statistic == "mean" else np.median |
| | observed = float(stat_fn(arr)) |
| |
|
| | |
| | boot_stats = np.empty(n_bootstrap) |
| | for i in range(n_bootstrap): |
| | sample = rng.choice(arr, size=n, replace=True) |
| | boot_stats[i] = stat_fn(sample) |
| |
|
| | |
| | z0 = stats.norm.ppf(np.mean(boot_stats < observed)) |
| |
|
| | |
| | jackknife_stats = np.empty(n) |
| | for i in range(n): |
| | jack_sample = np.delete(arr, i) |
| | jackknife_stats[i] = stat_fn(jack_sample) |
| | jack_mean = np.mean(jackknife_stats) |
| | num = np.sum((jack_mean - jackknife_stats) ** 3) |
| | den = 6 * (np.sum((jack_mean - jackknife_stats) ** 2) ** 1.5) |
| | a = num / den if den != 0 else 0.0 |
| |
|
| | |
| | alpha = 1 - confidence |
| | z_lower = stats.norm.ppf(alpha / 2) |
| | z_upper = stats.norm.ppf(1 - alpha / 2) |
| |
|
| | p_lower = stats.norm.cdf(z0 + (z0 + z_lower) / (1 - a * (z0 + z_lower))) |
| | p_upper = stats.norm.cdf(z0 + (z0 + z_upper) / (1 - a * (z0 + z_upper))) |
| |
|
| | |
| | p_lower = np.clip(p_lower, 0.001, 0.999) |
| | p_upper = np.clip(p_upper, 0.001, 0.999) |
| |
|
| | ci_lower = float(np.percentile(boot_stats, p_lower * 100)) |
| | ci_upper = float(np.percentile(boot_stats, p_upper * 100)) |
| |
|
| | return { |
| | "estimate": observed, |
| | "ci_lower": ci_lower, |
| | "ci_upper": ci_upper, |
| | "se": float(np.std(boot_stats)), |
| | "confidence": confidence, |
| | "n_bootstrap": n_bootstrap, |
| | "method": "BCa bootstrap", |
| | } |
| |
|
| |
|
| | def bootstrap_ci_diff( |
| | group1: List[float], |
| | group2: List[float], |
| | n_bootstrap: int = 10000, |
| | confidence: float = 0.95, |
| | paired: bool = True, |
| | seed: int = 42, |
| | ) -> Dict[str, float]: |
| | """ |
| | Bootstrap CI for the difference between two groups. |
| | |
| | Args: |
| | group1: First group scores |
| | group2: Second group scores |
| | n_bootstrap: Number of bootstrap resamples |
| | confidence: Confidence level |
| | paired: Whether data is paired (same subjects) |
| | seed: Random seed |
| | |
| | Returns: |
| | Dictionary with mean difference, ci_lower, ci_upper |
| | """ |
| | g1 = np.array(group1) |
| | g2 = np.array(group2) |
| | rng = np.random.default_rng(seed) |
| |
|
| | if paired: |
| | diffs = g1 - g2 |
| | n = len(diffs) |
| | observed_diff = float(np.mean(diffs)) |
| |
|
| | boot_diffs = np.empty(n_bootstrap) |
| | for i in range(n_bootstrap): |
| | sample = rng.choice(diffs, size=n, replace=True) |
| | boot_diffs[i] = np.mean(sample) |
| | else: |
| | n1, n2 = len(g1), len(g2) |
| | observed_diff = float(np.mean(g1) - np.mean(g2)) |
| |
|
| | boot_diffs = np.empty(n_bootstrap) |
| | for i in range(n_bootstrap): |
| | s1 = rng.choice(g1, size=n1, replace=True) |
| | s2 = rng.choice(g2, size=n2, replace=True) |
| | boot_diffs[i] = np.mean(s1) - np.mean(s2) |
| |
|
| | alpha = 1 - confidence |
| | ci_lower = float(np.percentile(boot_diffs, (alpha / 2) * 100)) |
| | ci_upper = float(np.percentile(boot_diffs, (1 - alpha / 2) * 100)) |
| |
|
| | return { |
| | "mean_diff": observed_diff, |
| | "ci_lower": ci_lower, |
| | "ci_upper": ci_upper, |
| | "se": float(np.std(boot_diffs)), |
| | "confidence": confidence, |
| | "n_bootstrap": n_bootstrap, |
| | } |
| |
|
| |
|
| | def descriptive_stats(data: List[float]) -> Dict[str, float]: |
| | """ |
| | Compute descriptive statistics for a sample. |
| | |
| | Args: |
| | data: Sample data |
| | |
| | Returns: |
| | Dictionary with mean, std, median, min, max, N, bootstrap CI |
| | """ |
| | arr = np.array(data) |
| | boot = bootstrap_ci(list(arr)) |
| | return { |
| | "n": len(arr), |
| | "mean": float(np.mean(arr)), |
| | "std": float(np.std(arr, ddof=1)), |
| | "median": float(np.median(arr)), |
| | "min": float(np.min(arr)), |
| | "max": float(np.max(arr)), |
| | "se": float(stats.sem(arr)), |
| | "ci_lower_95": boot["ci_lower"], |
| | "ci_upper_95": boot["ci_upper"], |
| | } |
| |
|
| |
|
| | def shapiro_wilk_test( |
| | data: List[float], |
| | alpha: float = 0.05, |
| | ) -> Dict[str, Any]: |
| | """ |
| | Shapiro-Wilk test for normality. |
| | |
| | Args: |
| | data: Sample data (paired differences for paired tests) |
| | alpha: Significance level |
| | |
| | Returns: |
| | Dictionary with W statistic, p-value, and whether normality holds |
| | """ |
| | arr = np.array(data) |
| | w, p = stats.shapiro(arr) |
| | return { |
| | "test_name": "Shapiro-Wilk", |
| | "W": float(w), |
| | "p_value": float(p), |
| | "normal": bool(p > alpha), |
| | "alpha": alpha, |
| | } |
| |
|
| |
|
| | def wilcoxon_signed_rank( |
| | condition1: List[float], |
| | condition2: List[float], |
| | alpha: float = 0.05, |
| | alternative: str = "two-sided", |
| | ) -> Dict[str, Any]: |
| | """ |
| | Wilcoxon signed-rank test (non-parametric alternative to paired t-test). |
| | |
| | Args: |
| | condition1: Scores from first condition |
| | condition2: Scores from second condition (same subjects) |
| | alpha: Significance level |
| | alternative: "two-sided", "greater", or "less" |
| | |
| | Returns: |
| | Dictionary with test statistic, p-value, and rank-biserial correlation |
| | """ |
| | c1 = np.array(condition1) |
| | c2 = np.array(condition2) |
| | diff = c1 - c2 |
| |
|
| | result = stats.wilcoxon(diff, alternative=alternative) |
| | n = len(diff) |
| |
|
| | |
| | r_rb = 1 - (2 * float(result.statistic)) / (n * (n + 1) / 2) |
| |
|
| | return { |
| | "test_name": "Wilcoxon signed-rank", |
| | "statistic": float(result.statistic), |
| | "p_value": float(result.pvalue), |
| | "effect_size": float(r_rb), |
| | "effect_size_name": "rank-biserial r", |
| | "n": n, |
| | "significant": bool(result.pvalue < alpha), |
| | "alpha": alpha, |
| | } |
| |
|
| |
|
| | def cohens_d_ci( |
| | d: float, |
| | n: int, |
| | alpha: float = 0.05, |
| | ) -> Dict[str, float]: |
| | """ |
| | Approximate 95% confidence interval for Cohen's d. |
| | |
| | Uses the formula: SE(d) = sqrt(1/n + d^2 / (2*n)) |
| | |
| | Args: |
| | d: Cohen's d point estimate |
| | n: Sample size (number of pairs for paired tests) |
| | alpha: Significance level |
| | |
| | Returns: |
| | Dictionary with d, ci_lower, ci_upper |
| | """ |
| | se = np.sqrt(1 / n + d**2 / (2 * n)) |
| | z = stats.norm.ppf(1 - alpha / 2) |
| | return { |
| | "d": float(d), |
| | "ci_lower": float(d - z * se), |
| | "ci_upper": float(d + z * se), |
| | "se": float(se), |
| | } |
| |
|
| |
|
| | def compare_all_pairs( |
| | conditions: Dict[str, List[float]], |
| | alpha: float = 0.05, |
| | paired: bool = True, |
| | correction: str = "holm", |
| | ) -> Dict[str, StatisticalResult]: |
| | """ |
| | Compare all pairs of conditions with multiple comparison correction. |
| | |
| | Args: |
| | conditions: Dictionary mapping condition names to scores |
| | alpha: Family-wise error rate |
| | paired: Whether data is paired |
| | correction: "bonferroni" or "holm" |
| | |
| | Returns: |
| | Dictionary of comparison results |
| | """ |
| | condition_names = list(conditions.keys()) |
| | n_conditions = len(condition_names) |
| |
|
| | results = {} |
| | p_values = [] |
| | comparison_keys = [] |
| |
|
| | |
| | for i in range(n_conditions): |
| | for j in range(i + 1, n_conditions): |
| | name1, name2 = condition_names[i], condition_names[j] |
| | key = f"{name1}_vs_{name2}" |
| |
|
| | if paired: |
| | result = paired_ttest( |
| | conditions[name1], conditions[name2], alpha=alpha |
| | ) |
| | else: |
| | result = independent_ttest( |
| | conditions[name1], conditions[name2], alpha=alpha |
| | ) |
| |
|
| | results[key] = result |
| | p_values.append(result.p_value) |
| | comparison_keys.append(key) |
| |
|
| | |
| | if correction == "bonferroni": |
| | corrected_alpha, significant = bonferroni_correction(p_values, alpha) |
| | adjusted_p = [p * len(p_values) for p in p_values] |
| | else: |
| | adjusted_p, significant = holm_bonferroni_correction(p_values, alpha) |
| |
|
| | |
| | for key, adj_p, sig in zip(comparison_keys, adjusted_p, significant): |
| | result = results[key] |
| | |
| | results[key] = StatisticalResult( |
| | test_name=result.test_name + f" ({correction}-corrected)", |
| | statistic=result.statistic, |
| | p_value=min(adj_p, 1.0), |
| | effect_size=result.effect_size, |
| | effect_size_name=result.effect_size_name, |
| | ci_lower=result.ci_lower, |
| | ci_upper=result.ci_upper, |
| | confidence_level=result.confidence_level, |
| | n=result.n, |
| | mean_diff=result.mean_diff, |
| | std_diff=result.std_diff, |
| | significant=sig, |
| | interpretation=result.interpretation, |
| | ) |
| |
|
| | return results |
| |
|
| |
|
| | def spearman_correlation( |
| | x: List[float], |
| | y: List[float], |
| | alpha: float = 0.05, |
| | ) -> Dict[str, Any]: |
| | """ |
| | Compute Spearman rank correlation with confidence interval. |
| | |
| | Args: |
| | x: First variable |
| | y: Second variable |
| | alpha: Significance level |
| | |
| | Returns: |
| | Dictionary with correlation, p-value, CI, and interpretation |
| | """ |
| | result = stats.spearmanr(x, y) |
| | rho = result.correlation |
| | p = result.pvalue |
| | n = len(x) |
| |
|
| | |
| | z = np.arctanh(rho) |
| | se_z = 1 / np.sqrt(n - 3) |
| | z_crit = stats.norm.ppf(1 - alpha / 2) |
| | ci_z_lower = z - z_crit * se_z |
| | ci_z_upper = z + z_crit * se_z |
| | ci_lower = np.tanh(ci_z_lower) |
| | ci_upper = np.tanh(ci_z_upper) |
| |
|
| | |
| | abs_rho = abs(rho) |
| | if abs_rho < 0.1: |
| | strength = "negligible" |
| | elif abs_rho < 0.3: |
| | strength = "weak" |
| | elif abs_rho < 0.5: |
| | strength = "moderate" |
| | elif abs_rho < 0.7: |
| | strength = "strong" |
| | else: |
| | strength = "very strong" |
| |
|
| | direction = "positive" if rho > 0 else "negative" |
| | significant = p < alpha |
| |
|
| | return { |
| | "rho": float(rho), |
| | "p_value": float(p), |
| | "n": n, |
| | "ci_lower": float(ci_lower), |
| | "ci_upper": float(ci_upper), |
| | "confidence_level": 1 - alpha, |
| | "significant": significant, |
| | "interpretation": f"{'Significant' if significant else 'Non-significant'} {strength} {direction} correlation", |
| | } |
| |
|