Create ML Models with BigQuery ML
Task 2: Create a forecasting BigQuery machine learning model
CREATE OR REPLACE MODEL austin.model_1
OPTIONS(
input_label_cols=['duration_minutes'],
model_type='linear_reg'
)AS SELECT duration_minutes,
address AS location,
start_station_name,
CAST(EXTRACT(dayofweek FROM start_time) AS STRING) as dayofweek,
CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday,
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips
JOIN `bigquery-public-data.austin_bikeshare.bikeshare_stations` AS stations
ON trips.start_station_id = stations.station_id
WHERE EXTRACT(year from start_time) = 2018;
Task 3: Create the second machine learning model
CREATE OR REPLACE MODEL austin.model_2
OPTIONS(input_label_cols=['duration_minutes'], model_type='linear_reg')
AS SELECT duration_minutes, subscriber_type, start_station_name,
CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips
WHERE EXTRACT(year from start_time) = 2018;
Task 4: Evaluate the two machine learning models
SELECT SQRT(mean_squared_error)AS rmse, mean_absolute_error
FROM ML.EVALUATE(MODEL austin.model_1, (
SELECT duration_minutes, address AS location, start_station_name,
CAST(EXTRACT(dayofweek FROM start_time) AS STRING) as dayofweek,
CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday,
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips
JOIN `bigquery-public-data.austin_bikeshare.bikeshare_stations` AS stations
ON trips.start_station_id = stations.station_id
WHERE EXTRACT(year from start_time) = 2019)
);
SELECT SQRT(mean_squared_error)AS rmse, mean_absolute_error
FROM ML.EVALUATE(MODEL austin.model_2, (
SELECT duration_minutes, subscriber_type, start_station_name,
CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips
WHERE EXTRACT(year from start_time) = 2019)
);
Task 5: Use the subscriber type machine learning model to predict average trip durations
SELECT start_station_name,
AVG(duration_minutes) AS avg_duration,
COUNT(*) AS total_trips
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips`
WHERE EXTRACT(year FROM start_time) = 2019
GROUP BY start_station_name
ORDER BY total_trips DESC;
SELECT avg(predicted_duration_minutes),
COUNT(duration_minutes)
FROM Ml.PREDICT(MODEL austin.model_2, (
SELECT duration_minutes, subscriber_type,
start_station_name,
CAST(EXTRACT(hour FROM start_time) AS STRING) AS hourofday
FROM `bigquery-public-data.austin_bikeshare.bikeshare_trips` AS trips
WHERE EXTRACT(year from start_time)=2019
AND start_station_name = '21st & Speedway @PCL'
AND subscriber_type='Single Trip' )
);
Last updated