hongbo-miao/hongbomiao.com

View on GitHub
data-processing/hm-spark/applications/find-taxi-top-routes/src/utils/zone.py

Summary

Maintainability
A
0 mins
Test Coverage
from pyspark.sql import DataFrame, SparkSession


def load_zones(spark: SparkSession, zone_data_path: str):
    return (
        spark.read.format("csv")
        .option("inferSchema", True)
        .option("header", True)
        .load(zone_data_path)
    )


def preprocess_zones(df: DataFrame) -> DataFrame:
    column_names = list(map(lambda x: x.lower(), df.columns))
    df = df.toDF(*column_names)
    return df.drop("objectid")