Google ADK Multi-Agent Pipeline Tutorial: Information Loading, Statistical Testing, Visualization, and Report Era in Python

def describe_dataset(dataset_name: str, tool_context: ToolContext) -> dict:
print(f”📊 Describing dataset: {dataset_name}”)

df = DATA_STORE.get_dataset(dataset_name)
if df is None:
return {“standing”: “error”, “message”: f”Dataset ‘{dataset_name}’ not discovered”}

numeric_cols = df.select_dtypes(embody=[np.number]).columns.tolist()
categorical_cols = df.select_dtypes(embody=[‘object’, ‘category’]).columns.tolist()

end result = {
“standing”: “success”,
“dataset”: dataset_name,
“overview”: {
“total_rows”: int(len(df)),
“total_columns”: int(len(df.columns)),
“numeric_columns”: numeric_cols,
“categorical_columns”: categorical_cols,
“memory_mb”: spherical(float(df.memory_usage(deep=True).sum() / 1024 / 1024), 2),
“duplicate_rows”: int(df.duplicated().sum()),
“missing_total”: int(df.isnull().sum().sum())
}
}

if numeric_cols:
stats_dict = {}
for col in numeric_cols:
col_data = df[col].dropna()
if len(col_data) > 0:
stats_dict[col] = {
“rely”: int(len(col_data)),
“imply”: spherical(float(col_data.imply()), 3),
“std”: spherical(float(col_data.std()), 3),
“min”: spherical(float(col_data.min()), 3),
“25%”: spherical(float(col_data.quantile(0.25)), 3),
“50%”: spherical(float(col_data.median()), 3),
“75%”: spherical(float(col_data.quantile(0.75)), 3),
“max”: spherical(float(col_data.max()), 3),
“skewness”: spherical(float(col_data.skew()), 3),
“lacking”: int(df[col].isnull().sum())
}
end result[“numeric_summary”] = stats_dict

if categorical_cols:
cat_dict = {}
for col in categorical_cols[:10]:
vc = df[col].value_counts()
cat_dict[col] = {
“unique_values”: int(df[col].nunique()),
“top_values”: {str(okay): int(v) for okay, v in vc.head(5).gadgets()},
“lacking”: int(df[col].isnull().sum())
}
end result[“categorical_summary”] = cat_dict

DATA_STORE.log_analysis(“describe”, dataset_name, “Statistics generated”)
return make_serializable(end result)

def correlation_analysis(dataset_name: str, technique: str = “pearson”, tool_context: ToolContext = None) -> dict:
print(f”📊 Correlation evaluation: {dataset_name} ({technique})”)

df = DATA_STORE.get_dataset(dataset_name)
if df is None:
return {“standing”: “error”, “message”: f”Dataset ‘{dataset_name}’ not discovered”}

numeric_df = df.select_dtypes(embody=[np.number])

if numeric_df.form[1] < 2:
return {“standing”: “error”, “message”: “Want at the least 2 numeric columns”}

corr_matrix = numeric_df.corr(technique=technique)

strong_corrs = []
for i in vary(len(corr_matrix.columns)):
for j in vary(i + 1, len(corr_matrix.columns)):
col1, col2 = corr_matrix.columns[i], corr_matrix.columns[j]
val = corr_matrix.iloc[i, j]
if abs(val) > 0.5:
strong_corrs.append({
“var1”: col1,
“var2”: col2,
“correlation”: spherical(float(val), 3),
“energy”: “robust” if abs(val) > 0.7 else “average”
})

strong_corrs.kind(key=lambda x: abs(x[“correlation”]), reverse=True)

corr_dict = {}
for col in corr_matrix.columns:
corr_dict[col] = {okay: spherical(float(v), 3) for okay, v in corr_matrix[col].gadgets()}

DATA_STORE.log_analysis(“correlation”, dataset_name, f”{technique} correlation”)

return make_serializable({
“standing”: “success”,
“technique”: technique,
“correlation_matrix”: corr_dict,
“strong_correlations”: strong_corrs[:10],
“perception”: f”Discovered {len(strong_corrs)} pairs with |correlation| > 0.5″
})

def hypothesis_test(dataset_name: str, test_type: str, column1: str,
column2: str = None, group_column: str = None,
tool_context: ToolContext = None) -> dict:
print(f”📊 Speculation take a look at: {test_type} on {dataset_name}”)

df = DATA_STORE.get_dataset(dataset_name)
if df is None:
return {“standing”: “error”, “message”: f”Dataset ‘{dataset_name}’ not discovered”}

if column1 not in df.columns:
return {“standing”: “error”, “message”: f”Column ‘{column1}’ not discovered”}

attempt:
if test_type == “normality”:
information = df[column1].dropna()
if len(information) > 5000:
information = information.pattern(5000)
stat, p = stats.shapiro(information)

return make_serializable({
“standing”: “success”,
“take a look at”: “Shapiro-Wilk Normality Check”,
“column”: column1,
“statistic”: spherical(float(stat), 4),
“p_value”: spherical(float(p), 6),
“is_normal”: bool(p > 0.05),
“interpretation”: “Information seems usually distributed” if p > 0.05 else “Information is NOT usually distributed”
})

elif test_type == “ttest”:
if group_column is None:
return {“standing”: “error”, “message”: “group_column required for t-test”}

teams = df[group_column].dropna().distinctive()
if len(teams) != 2:
return {“standing”: “error”, “message”: f”T-test wants precisely 2 teams, discovered {len(teams)}: {checklist(teams)}”}

g1 = df[df[group_column] == teams[0]][column1].dropna()
g2 = df[df[group_column] == teams[1]][column1].dropna()

stat, p = stats.ttest_ind(g1, g2)

return make_serializable({
“standing”: “success”,
“take a look at”: “Impartial Samples T-Check”,
“evaluating”: column1,
“group1”: {“title”: str(teams[0]), “imply”: spherical(float(g1.imply()), 3), “n”: int(len(g1))},
“group2”: {“title”: str(teams[1]), “imply”: spherical(float(g2.imply()), 3), “n”: int(len(g2))},
“t_statistic”: spherical(float(stat), 4),
“p_value”: spherical(float(p), 6),
“important”: bool(p < 0.05),
“interpretation”: “Vital distinction” if p < 0.05 else “No important distinction”
})

elif test_type == “anova”:
if group_column is None:
return {“standing”: “error”, “message”: “group_column required for ANOVA”}

groups_data = [grp[column1].dropna().values for _, grp in df.groupby(group_column)]
group_names = checklist(df[group_column].distinctive())

stat, p = stats.f_oneway(*groups_data)

group_stats = []
for title in group_names:
grp_data = df[df[group_column] == title][column1].dropna()
group_stats.append({
“group”: str(title),
“imply”: spherical(float(grp_data.imply()), 3),
“std”: spherical(float(grp_data.std()), 3),
“n”: int(len(grp_data))
})

return make_serializable({
“standing”: “success”,
“take a look at”: “One-Method ANOVA”,
“evaluating”: column1,
“throughout”: group_column,
“n_groups”: int(len(group_names)),
“group_statistics”: group_stats,
“f_statistic”: spherical(float(stat), 4),
“p_value”: spherical(float(p), 6),
“important”: bool(p < 0.05),
“interpretation”: “Vital variations amongst teams” if p < 0.05 else “No important variations”
})

elif test_type == “chi2”:
if column2 is None:
return {“standing”: “error”, “message”: “column2 required for chi-square take a look at”}

contingency = pd.crosstab(df[column1], df[column2])
chi2, p, dof, _ = stats.chi2_contingency(contingency)

return make_serializable({
“standing”: “success”,
“take a look at”: “Chi-Sq. Check of Independence”,
“variables”: [column1, column2],
“chi2_statistic”: spherical(float(chi2), 4),
“p_value”: spherical(float(p), 6),
“degrees_of_freedom”: int(dof),
“important”: bool(p < 0.05),
“interpretation”: “Variables are dependent” if p < 0.05 else “Variables are unbiased”
})

else:
return {“standing”: “error”, “message”: f”Unknown take a look at: {test_type}. Use: normality, ttest, anova, chi2″}

besides Exception as e:
return {“standing”: “error”, “message”: f”Check failed: {str(e)}”}

def outlier_detection(dataset_name: str, column: str, technique: str = “iqr”,
tool_context: ToolContext = None) -> dict:
print(f”📊 Outlier detection: {column} in {dataset_name}”)

df = DATA_STORE.get_dataset(dataset_name)
if df is None:
return {“standing”: “error”, “message”: f”Dataset ‘{dataset_name}’ not discovered”}

if column not in df.columns:
return {“standing”: “error”, “message”: f”Column ‘{column}’ not discovered”}

information = df[column].dropna()

if technique == “iqr”:
Q1 = float(information.quantile(0.25))
Q3 = float(information.quantile(0.75))
IQR = Q3 – Q1
decrease = Q1 – 1.5 * IQR
higher = Q3 + 1.5 * IQR
outliers = information[(data < lower) | (data > upper)]

return make_serializable({
“standing”: “success”,
“technique”: “IQR (Interquartile Vary)”,
“column”: column,
“bounds”: {“decrease”: spherical(decrease, 3), “higher”: spherical(higher, 3)},
“iqr”: spherical(IQR, 3),
“total_values”: int(len(information)),
“outlier_count”: int(len(outliers)),
“outlier_pct”: spherical(float(len(outliers) / len(information) * 100), 2),
“outlier_examples”: [round(float(x), 2) for x in outliers.head(10).tolist()]
})

elif technique == “zscore”:
z = np.abs(stats.zscore(information))
outliers = information[z > 3]

return make_serializable({
“standing”: “success”,
“technique”: “Z-Rating (threshold: 3)”,
“column”: column,
“total_values”: int(len(information)),
“outlier_count”: int(len(outliers)),
“outlier_pct”: spherical(float(len(outliers) / len(information) * 100), 2),
“outlier_examples”: [round(float(x), 2) for x in outliers.head(10).tolist()]
})

return {“standing”: “error”, “message”: f”Unknown technique: {technique}. Use: iqr, zscore”}

print(“✅ Statistical evaluation instruments outlined!”)

What's Hot

NYT Strands hints and solutions for Tuesday, Might 12 (sport #800)

OpenAI Introduces Dawn: A Cybersecurity Initiative That Places Codex Safety on the Middle of Vulnerability Detection and Patch Validation

FAQ on hantavirus and outbreak on cruise ship Hondius

OpenAI Introduces Dawn: A Cybersecurity Initiative That Places Codex Safety on the Middle of Vulnerability Detection and Patch Validation

What’s new in Android’s Could 2026 Google System Updates [U]

Google says AI is being abused at industrial scale for cyberattacks, and it simply thwarted one

College students Boo Graduation Speaker After She Calls AI the ‘Subsequent Industrial Revolution’

10 GitHub Repositories to Grasp FastAPI

Constructing internet search-enabled brokers with Strands and Exa

NYT Strands hints and solutions for Tuesday, Might 12 (sport #800)

OpenAI Introduces Dawn: A Cybersecurity Initiative That Places Codex Safety on the Middle of Vulnerability Detection and Patch Validation

FAQ on hantavirus and outbreak on cruise ship Hondius

NYT Strands hints and solutions for Tuesday, Might 12 (sport #800)

OpenAI Introduces Dawn: A Cybersecurity Initiative That Places Codex Safety on the Middle of Vulnerability Detection and Patch Validation

FAQ on hantavirus and outbreak on cruise ship Hondius

Usefull link

categories

What's Hot

Related Posts

Usefull link

categories