Steam store API Big Data study: Plotly query representation¶
Importing Plotly libraries¶
In [1]:
import $ivy.`org.plotly-scala::plotly-almond:0.7.1`
import plotly._, plotly.element._, plotly.layout._, plotly.Almond._, plotly.Trace
import scala.io.Source
init(offline=true)
repl.pprinter() = repl.pprinter().copy(defaultHeight = 3)
Out[1]:
import $ivy.$ import plotly._, plotly.element._, plotly.layout._, plotly.Almond._, plotly.Trace import scala.io.Source
A function for processing and filtering our generated CSVs¶
In [2]:
import scala.util.Try
def processCsv(csv_path: String,
x_min: Double = 0, x_max: Double = Double.PositiveInfinity,
y_min: Double = 0, y_max: Double = Double.PositiveInfinity) = {
val lines = Source.fromFile(csv_path).getLines().toList
val data = lines.map(_.split(","))
val filteredData = data.filter(list => {
val tryFloatX = Try(list(0).toDouble).toOption;
val tryFloatY = Try(list(1).toDouble).toOption;
val filterX = if (tryFloatX != None) tryFloatX.get > x_min && tryFloatX.get < x_max else true
val filterY = if (tryFloatY != None) tryFloatY.get > y_min && tryFloatY.get < y_max else true
list(0) != "" && filterX && filterY
})
val sequenceFromFirstElements = filteredData.map {
case Array(firstElement, _) => firstElement
}
val sequenceFromSecondElements = filteredData.map {
case Array(_, secondElement) => secondElement
}
Array(sequenceFromFirstElements, sequenceFromSecondElements)
}
Out[2]:
import scala.util.Try defined function processCsv
Steam store API: Several distributions¶
Games released by year (2006-2024)¶
In [9]:
val filename = "OutputCSVs/SQL/ReleaseDateCount/part-00000-1841456d-3964-4077-a48a-2a7315caa380-c000.csv"
val processedCsv = processCsv(filename, y_min = 10)
val plot = Seq(
Scatter(processedCsv(0), processedCsv(1))
)
plot.plot(title = "Games released on Steam by year")
Out[9]:
filename: String = "OutputCSVs/SQL/ReleaseDateCount/part-00000-1841456d-3964-4077-a48a-2a7315caa380-c000.csv" processedCsv: Array[List[String]] = Array( List( ... plot: Seq[Scatter] = List( Scatter( ... res8_3: String = "plot-f6d1f277-1dde-4a58-8ff0-744d9583cc97"
Product price distribution (0-80€)¶
In [15]:
val filename = "OutputCSVs/SQL/GamePriceDistribution/part-00000-2add8743-e32f-4d58-9c26-745ee2801712-c000.csv"
val processedCsv = processCsv(filename, x_max = 80, y_min = 100)
val plot = Seq(
Scatter(processedCsv(0), processedCsv(1))
)
plot.plot(title = "Product price distribution in €")
Out[15]:
filename: String = "OutputCSVs/SQL/GamePriceDistribution/part-00000-2add8743-e32f-4d58-9c26-745ee2801712-c000.csv" processedCsv: Array[List[String]] = Array( List( ... plot: Seq[Scatter] = List( Scatter( ... res14_3: String = "plot-3e43e28c-a4dd-4f3a-84c5-8efda9b22c0d"
Operative System compatibility (Windows, Mac & Linux)¶
In [5]:
def opSysCompatibilityCheck(str: Array[String]): String = {
str match {
case Array("true", "true", "true", _) => "Full Compatibility"
case Array("true", "true", "false", _) => "Windows & Mac"
case Array("true", "false", "true", _) => "Windows & Linux"
case Array("true", "false", "false", _) => "Windows only"
case _ => "Other"
}
}
val filename = "OutputCSVs/SQL/CompatibleGames/part-00000-eb2d0d09-edc8-4d3a-9046-297871852a2d-c000.csv"
val lines = Source.fromFile(filename).getLines().toList
val data = lines.map(_.split(",")).map(list => Array(opSysCompatibilityCheck(list), list(3)))
val eljuegoraro = data.filter(x => x(0) == "Other")
val sequenceFromFirstElements = data.map {
case Array(firstElement, _) => firstElement
}
val sequenceFromSecondElements = data.map {
case Array(_, secondElement) => secondElement
}
val plot = Seq(
Scatter(sequenceFromFirstElements, sequenceFromSecondElements)
)
plot.plot(title = "Operative System Compatibility (Windows, Linux & Mac)")
Out[5]:
defined function opSysCompatibilityCheck filename: String = "OutputCSVs/SQL/CompatibleGames/part-00000-eb2d0d09-edc8-4d3a-9046-297871852a2d-c000.csv" lines: List[String] = List( "true,false,false,100028", ... data: List[Array[String]] = List( Array("Windows only", "100028"), ... eljuegoraro: List[Array[String]] = List( Array("Other", "32"), ... sequenceFromFirstElements: List[String] = List( "Windows only", ... sequenceFromSecondElements: List[String] = List( "100028", ... plot: Seq[Scatter] = List( Scatter( ... res4_8: String = "plot-bf0dd523-a46f-43d4-bef1-3df32bb12710"
Product price distribution¶
In [6]:
val filename = "OutputCSVs/SQL/ProductTypeCount/part-00000-15c0f0bc-7f65-4fad-9f3f-ba83bc999044-c000.csv"
val processedCsv = processCsv(filename)
val plot = Seq(
Bar(processedCsv(0), processedCsv(1))
)
plot.plot(title = "Product type distribution")
Out[6]:
filename: String = "OutputCSVs/SQL/ProductTypeCount/part-00000-15c0f0bc-7f65-4fad-9f3f-ba83bc999044-c000.csv" processedCsv: Array[List[String]] = Array( List( ... plot: Seq[Bar] = List( Bar( ... res5_3: String = "plot-ac4647f6-7fbd-4b2f-b71b-ed74b88e4f35"
Top Steam Categories¶
In [186]:
val filename = "OutputCSVs/SQL/CategoryCount/part-00000-76c40d04-1e42-4031-a148-c48a297c8a8f-c000.csv"
val processedCsv = processCsv(filename, y_min = 1000)
val plot = Seq(
Bar(processedCsv(0), processedCsv(1))
)
plot.plot(title = "Top Steam Categories")
Out[186]:
filename: String = "OutputCSVs/SQL/CategoryCount/part-00000-76c40d04-1e42-4031-a148-c48a297c8a8f-c000.csv" processedCsv: Array[List[String]] = Array( List( ... plot: Seq[Bar] = List( Bar( ... res185_3: String = "plot-a85dc14e-bc5b-4b02-a409-d594a6940ccb"
Top Steam Genres¶
In [188]:
val filename = "OutputCSVs/SQL/GenreCount/part-00000-4c2d0d06-78d5-4435-b871-7bf74ee8679f-c000.csv"
val processedCsv = processCsv(filename, y_min = 300)
val plot = Seq(
Bar(processedCsv(0), processedCsv(1))
)
plot.plot(title = "Top Steam Genres")
Out[188]:
filename: String = "OutputCSVs/SQL/GenreCount/part-00000-4c2d0d06-78d5-4435-b871-7bf74ee8679f-c000.csv" processedCsv: Array[List[String]] = Array( List( ... plot: Seq[Bar] = List( Bar( ... res187_3: String = "plot-e0b305b7-f53a-4887-afac-563252f5bc32"
Top Steam Categories and Genres: Price calculations¶
Category price Average, Median and Mode¶
In [180]:
val filenameAvg = "OutputCSVs/SQL/CategoryPriceAvg/part-00000-eeee418a-e74c-4e14-8110-4f1d8dfd6e24-c000.csv"
val filenameMedian = "OutputCSVs/SQL/CategoryPriceMedian/part-00000-a9fe33f9-1de3-40d1-a4b7-21bbe010eff7-c000.csv"
val filenameMode = "OutputCSVs/SQL/CategoryPriceMostFrequent/part-00000-12d576c7-fbfa-446a-8c5e-626246ed5727-c000.csv"
val processedCsvs = List(processCsv(filenameAvg),
processCsv(filenameMedian),
processCsv(filenameMode))
val plot = Seq(
Bar(processedCsvs(0)(0), processedCsvs(0)(1), name = "Average"),
Bar(processedCsvs(1)(0), processedCsvs(1)(1), name = "Median"),
Bar(processedCsvs(2)(0), processedCsvs(2)(1), name = "Mode")
)
plot.plot(title = "Category price Average, Median and Mode in €")
Out[180]:
filenameAvg: String = "OutputCSVs/SQL/CategoryPriceAvg/part-00000-eeee418a-e74c-4e14-8110-4f1d8dfd6e24-c000.csv" filenameMedian: String = "OutputCSVs/SQL/CategoryPriceMedian/part-00000-a9fe33f9-1de3-40d1-a4b7-21bbe010eff7-c000.csv" filenameMode: String = "OutputCSVs/SQL/CategoryPriceMostFrequent/part-00000-12d576c7-fbfa-446a-8c5e-626246ed5727-c000.csv" processedCsvs: List[Array[List[String]]] = List( Array( ... plot: Seq[Bar] = List( Bar( ... res179_5: String = "plot-47825b91-d687-44df-92f3-4b80137f82f3"
Genre price Average, Median and Mode¶
In [189]:
val filenameAvg = "OutputCSVs/SQL/GenrePriceAvg/part-00000-9f5fd11b-f0fa-4fd1-b03c-542551670741-c000.csv"
val filenameMedian = "OutputCSVs/SQL/GenrePriceMedian/part-00000-7355f51e-cd7b-46f8-b81e-a4f642ae4347-c000.csv"
val filenameMode = "OutputCSVs/SQL/GenrePriceMostFrequent/part-00000-bee1b851-813f-4862-9e0f-65aab85a5110-c000.csv"
val processedCsvs = List(processCsv(filenameAvg),
processCsv(filenameMedian),
processCsv(filenameMode))
val plot = Seq(
Bar(processedCsvs(0)(0), processedCsvs(0)(1), name = "Average"),
Bar(processedCsvs(1)(0), processedCsvs(1)(1), name = "Median"),
Bar(processedCsvs(2)(0), processedCsvs(2)(1), name = "Mode")
)
plot.plot(title = "Genre price Average, Median and Mode in €")
Out[189]:
filenameAvg: String = "OutputCSVs/SQL/GenrePriceAvg/part-00000-9f5fd11b-f0fa-4fd1-b03c-542551670741-c000.csv" filenameMedian: String = "OutputCSVs/SQL/GenrePriceMedian/part-00000-7355f51e-cd7b-46f8-b81e-a4f642ae4347-c000.csv" filenameMode: String = "OutputCSVs/SQL/GenrePriceMostFrequent/part-00000-bee1b851-813f-4862-9e0f-65aab85a5110-c000.csv" processedCsvs: List[Array[List[String]]] = List( Array( ... plot: Seq[Bar] = List( Bar( ... res188_5: String = "plot-90cf3371-682e-4a98-93a1-cd49eb08ed23"
In [ ]: