Create ML Models with BigQuery ML

Task 2: Create a forecasting BigQuery machine learning model

CREATE OR REPLACE MODEL austin.model_1
OPTIONS(
    input_label_cols=['duration_minutes'], 
    model_type='linear_reg'
)AS SELECT duration_minutes, 
    address AS location, 
    start_station_name,
    CAST(EXTRACT(dayofweek FROM start_time) AS STRING) as dayofweek, 
    CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday, 
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips 
JOIN `bigquery-public-data.austin_bikeshare.bikeshare_stations` AS stations 
ON trips.start_station_id = stations.station_id 
WHERE EXTRACT(year from start_time) = 2018;

Task 3: Create the second machine learning model

CREATE OR REPLACE MODEL austin.model_2 
OPTIONS(input_label_cols=['duration_minutes'], model_type='linear_reg') 
AS SELECT duration_minutes, subscriber_type, start_station_name, 
CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday 
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips 
WHERE EXTRACT(year from start_time) = 2018;

Task 4: Evaluate the two machine learning models

SELECT SQRT(mean_squared_error)AS rmse, mean_absolute_error 
FROM ML.EVALUATE(MODEL austin.model_1, (
    SELECT duration_minutes, address AS location, start_station_name, 
    CAST(EXTRACT(dayofweek FROM start_time) AS STRING) as dayofweek, 
    CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday, 
    FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips 
    JOIN `bigquery-public-data.austin_bikeshare.bikeshare_stations` AS stations 
    ON trips.start_station_id = stations.station_id 
    WHERE EXTRACT(year from start_time) = 2019)
);
SELECT SQRT(mean_squared_error)AS rmse, mean_absolute_error 
FROM ML.EVALUATE(MODEL austin.model_2, (
    SELECT duration_minutes, subscriber_type, start_station_name, 
    CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday 
    FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips 
    WHERE EXTRACT(year from start_time) = 2019)
);

Task 5: Use the subscriber type machine learning model to predict average trip durations

SELECT start_station_name, 
    AVG(duration_minutes) AS avg_duration, 
    COUNT(*) AS total_trips 
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` 
WHERE EXTRACT(year FROM start_time) = 2019 
GROUP BY start_station_name 
ORDER BY total_trips DESC;
SELECT avg(predicted_duration_minutes), 
    COUNT(duration_minutes) 
FROM Ml.PREDICT(MODEL austin.model_2, (
    SELECT duration_minutes, subscriber_type, 
        start_station_name,  
        CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday 
    FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips 
    WHERE EXTRACT(year from start_time)=2019 
    AND start_station_name = '21st & Speedway @PCL' 
    AND subscriber_type='Single Trip' )
);

Last updated