Steam store API Big Data study: Plotly query representation¶


Importing Plotly libraries¶


In [1]:
import $ivy.`org.plotly-scala::plotly-almond:0.7.1`
import plotly._, plotly.element._, plotly.layout._, plotly.Almond._, plotly.Trace
import scala.io.Source

init(offline=true)
repl.pprinter() = repl.pprinter().copy(defaultHeight = 3)
Out[1]:
import $ivy.$                                      

import plotly._, plotly.element._, plotly.layout._, plotly.Almond._, plotly.Trace

import scala.io.Source

A function for processing and filtering our generated CSVs¶


In [2]:
import scala.util.Try

def processCsv(csv_path: String, 
               x_min: Double = 0, x_max: Double = Double.PositiveInfinity, 
               y_min: Double = 0, y_max: Double = Double.PositiveInfinity) = {
    
    val lines = Source.fromFile(csv_path).getLines().toList
    val data = lines.map(_.split(","))

    val filteredData = data.filter(list => {
        
        val tryFloatX = Try(list(0).toDouble).toOption;
        val tryFloatY = Try(list(1).toDouble).toOption;
        
        val filterX = if (tryFloatX != None) tryFloatX.get > x_min && tryFloatX.get < x_max else true
        val filterY = if (tryFloatY != None) tryFloatY.get > y_min && tryFloatY.get < y_max else true
        
        list(0) != "" && filterX && filterY
    })
    
    val sequenceFromFirstElements = filteredData.map {
      case Array(firstElement, _) => firstElement
    }
    
    val sequenceFromSecondElements = filteredData.map {
      case Array(_, secondElement) => secondElement
    }
    Array(sequenceFromFirstElements, sequenceFromSecondElements)
}
Out[2]:
import scala.util.Try


defined function processCsv

Steam store API: Several distributions¶


Games released by year (2006-2024)¶


In [9]:
val filename = "OutputCSVs/SQL/ReleaseDateCount/part-00000-1841456d-3964-4077-a48a-2a7315caa380-c000.csv"
val processedCsv = processCsv(filename, y_min = 10)

val plot = Seq(
    Scatter(processedCsv(0), processedCsv(1))
)

plot.plot(title = "Games released on Steam by year")
Out[9]:
filename: String = "OutputCSVs/SQL/ReleaseDateCount/part-00000-1841456d-3964-4077-a48a-2a7315caa380-c000.csv"
processedCsv: Array[List[String]] = Array(
  List(
...
plot: Seq[Scatter] = List(
  Scatter(
...
res8_3: String = "plot-f6d1f277-1dde-4a58-8ff0-744d9583cc97"

Product price distribution (0-80€)¶


In [15]:
val filename = "OutputCSVs/SQL/GamePriceDistribution/part-00000-2add8743-e32f-4d58-9c26-745ee2801712-c000.csv"
val processedCsv = processCsv(filename, x_max = 80, y_min = 100)

val plot = Seq(
    Scatter(processedCsv(0), processedCsv(1))
)

plot.plot(title = "Product price distribution in €")
Out[15]:
filename: String = "OutputCSVs/SQL/GamePriceDistribution/part-00000-2add8743-e32f-4d58-9c26-745ee2801712-c000.csv"
processedCsv: Array[List[String]] = Array(
  List(
...
plot: Seq[Scatter] = List(
  Scatter(
...
res14_3: String = "plot-3e43e28c-a4dd-4f3a-84c5-8efda9b22c0d"

Operative System compatibility (Windows, Mac & Linux)¶


In [5]:
def opSysCompatibilityCheck(str: Array[String]): String = {
    str match {
        case Array("true", "true", "true", _) => "Full Compatibility"
        case Array("true", "true", "false", _) => "Windows & Mac"
        case Array("true", "false", "true", _) => "Windows & Linux"
        case Array("true", "false", "false", _) => "Windows only"
        case _ => "Other"
    }
}
val filename = "OutputCSVs/SQL/CompatibleGames/part-00000-eb2d0d09-edc8-4d3a-9046-297871852a2d-c000.csv"

val lines = Source.fromFile(filename).getLines().toList
val data = lines.map(_.split(",")).map(list => Array(opSysCompatibilityCheck(list), list(3)))

val eljuegoraro = data.filter(x => x(0) == "Other")

val sequenceFromFirstElements = data.map {
  case Array(firstElement, _) => firstElement
}

val sequenceFromSecondElements = data.map {
  case Array(_, secondElement) => secondElement
}

val plot = Seq(
    Scatter(sequenceFromFirstElements, sequenceFromSecondElements)
)

plot.plot(title = "Operative System Compatibility (Windows, Linux & Mac)")
Out[5]:
defined function opSysCompatibilityCheck
filename: String = "OutputCSVs/SQL/CompatibleGames/part-00000-eb2d0d09-edc8-4d3a-9046-297871852a2d-c000.csv"
lines: List[String] = List(
  "true,false,false,100028",
...
data: List[Array[String]] = List(
  Array("Windows only", "100028"),
...
eljuegoraro: List[Array[String]] = List(
  Array("Other", "32"),
...
sequenceFromFirstElements: List[String] = List(
  "Windows only",
...
sequenceFromSecondElements: List[String] = List(
  "100028",
...
plot: Seq[Scatter] = List(
  Scatter(
...
res4_8: String = "plot-bf0dd523-a46f-43d4-bef1-3df32bb12710"

Product price distribution¶


In [6]:
val filename = "OutputCSVs/SQL/ProductTypeCount/part-00000-15c0f0bc-7f65-4fad-9f3f-ba83bc999044-c000.csv"
val processedCsv = processCsv(filename)

val plot = Seq(
    Bar(processedCsv(0), processedCsv(1))
)

plot.plot(title = "Product type distribution")
Out[6]:
filename: String = "OutputCSVs/SQL/ProductTypeCount/part-00000-15c0f0bc-7f65-4fad-9f3f-ba83bc999044-c000.csv"
processedCsv: Array[List[String]] = Array(
  List(
...
plot: Seq[Bar] = List(
  Bar(
...
res5_3: String = "plot-ac4647f6-7fbd-4b2f-b71b-ed74b88e4f35"

Top Steam Categories¶


In [186]:
val filename = "OutputCSVs/SQL/CategoryCount/part-00000-76c40d04-1e42-4031-a148-c48a297c8a8f-c000.csv"
val processedCsv = processCsv(filename, y_min = 1000)

val plot = Seq(
    Bar(processedCsv(0), processedCsv(1))
)

plot.plot(title = "Top Steam Categories")
Out[186]:
filename: String = "OutputCSVs/SQL/CategoryCount/part-00000-76c40d04-1e42-4031-a148-c48a297c8a8f-c000.csv"
processedCsv: Array[List[String]] = Array(
  List(
...
plot: Seq[Bar] = List(
  Bar(
...
res185_3: String = "plot-a85dc14e-bc5b-4b02-a409-d594a6940ccb"

Top Steam Genres¶


In [188]:
val filename = "OutputCSVs/SQL/GenreCount/part-00000-4c2d0d06-78d5-4435-b871-7bf74ee8679f-c000.csv"
val processedCsv = processCsv(filename, y_min = 300)

val plot = Seq(
    Bar(processedCsv(0), processedCsv(1))
)

plot.plot(title = "Top Steam Genres")
Out[188]:
filename: String = "OutputCSVs/SQL/GenreCount/part-00000-4c2d0d06-78d5-4435-b871-7bf74ee8679f-c000.csv"
processedCsv: Array[List[String]] = Array(
  List(
...
plot: Seq[Bar] = List(
  Bar(
...
res187_3: String = "plot-e0b305b7-f53a-4887-afac-563252f5bc32"

Top Steam Categories and Genres: Price calculations¶


Category price Average, Median and Mode¶


In [180]:
val filenameAvg = "OutputCSVs/SQL/CategoryPriceAvg/part-00000-eeee418a-e74c-4e14-8110-4f1d8dfd6e24-c000.csv"
val filenameMedian = "OutputCSVs/SQL/CategoryPriceMedian/part-00000-a9fe33f9-1de3-40d1-a4b7-21bbe010eff7-c000.csv"
val filenameMode = "OutputCSVs/SQL/CategoryPriceMostFrequent/part-00000-12d576c7-fbfa-446a-8c5e-626246ed5727-c000.csv"

val processedCsvs = List(processCsv(filenameAvg),
                         processCsv(filenameMedian),
                         processCsv(filenameMode))

val plot = Seq(
    Bar(processedCsvs(0)(0), processedCsvs(0)(1), name = "Average"),
    Bar(processedCsvs(1)(0), processedCsvs(1)(1), name = "Median"),
    Bar(processedCsvs(2)(0), processedCsvs(2)(1), name = "Mode")
)

plot.plot(title = "Category price Average, Median and Mode in €")
Out[180]:
filenameAvg: String = "OutputCSVs/SQL/CategoryPriceAvg/part-00000-eeee418a-e74c-4e14-8110-4f1d8dfd6e24-c000.csv"
filenameMedian: String = "OutputCSVs/SQL/CategoryPriceMedian/part-00000-a9fe33f9-1de3-40d1-a4b7-21bbe010eff7-c000.csv"
filenameMode: String = "OutputCSVs/SQL/CategoryPriceMostFrequent/part-00000-12d576c7-fbfa-446a-8c5e-626246ed5727-c000.csv"
processedCsvs: List[Array[List[String]]] = List(
  Array(
...
plot: Seq[Bar] = List(
  Bar(
...
res179_5: String = "plot-47825b91-d687-44df-92f3-4b80137f82f3"

Genre price Average, Median and Mode¶


In [189]:
val filenameAvg = "OutputCSVs/SQL/GenrePriceAvg/part-00000-9f5fd11b-f0fa-4fd1-b03c-542551670741-c000.csv"
val filenameMedian = "OutputCSVs/SQL/GenrePriceMedian/part-00000-7355f51e-cd7b-46f8-b81e-a4f642ae4347-c000.csv"
val filenameMode = "OutputCSVs/SQL/GenrePriceMostFrequent/part-00000-bee1b851-813f-4862-9e0f-65aab85a5110-c000.csv"

val processedCsvs = List(processCsv(filenameAvg),
                         processCsv(filenameMedian),
                         processCsv(filenameMode))

val plot = Seq(
    Bar(processedCsvs(0)(0), processedCsvs(0)(1), name = "Average"),
    Bar(processedCsvs(1)(0), processedCsvs(1)(1), name = "Median"),
    Bar(processedCsvs(2)(0), processedCsvs(2)(1), name = "Mode")
)

plot.plot(title = "Genre price Average, Median and Mode in €")
Out[189]:
filenameAvg: String = "OutputCSVs/SQL/GenrePriceAvg/part-00000-9f5fd11b-f0fa-4fd1-b03c-542551670741-c000.csv"
filenameMedian: String = "OutputCSVs/SQL/GenrePriceMedian/part-00000-7355f51e-cd7b-46f8-b81e-a4f642ae4347-c000.csv"
filenameMode: String = "OutputCSVs/SQL/GenrePriceMostFrequent/part-00000-bee1b851-813f-4862-9e0f-65aab85a5110-c000.csv"
processedCsvs: List[Array[List[String]]] = List(
  Array(
...
plot: Seq[Bar] = List(
  Bar(
...
res188_5: String = "plot-90cf3371-682e-4a98-93a1-cd49eb08ed23"
In [ ]: