%useLatestDescriptors
%use dataframe
%use lets-plot

val df = DataFrame.readCSV("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/nobel.csv")
df.head(3)

val raw = df.toMap()
val n = (raw.values.first() as List<*>).size

val fullnames = (0 until n).map { i ->
    "${raw["firstname"]!![i]} ${raw["surname"]!![i]}"
}
val bornCodes = raw["born_country_code"] as List<*>
val diedCodes = raw["died_country_code"] as List<*>

fun filterAndDedup(keepRow: (Int) -> Boolean): Map<String, List<*>> {
    val seen = mutableSetOf<String>()
    val indices = (0 until n).filter { i ->
        bornCodes[i] != null && diedCodes[i] != null &&
        keepRow(i) && seen.add(fullnames[i])
    }
    return (raw + mapOf("fullname" to fullnames)).mapValues { (_, v) ->
        indices.map { (v as List<*>)[it] }
    }
}

val notMigrated = filterAndDedup { i -> bornCodes[i] == diedCodes[i] }
val migrated    = filterAndDedup { i -> bornCodes[i] != diedCodes[i] }

println("Non-migrated: ${(notMigrated.values.first()).size}, Migrated: ${(migrated.values.first()).size}")

Non-migrated: 424, Migrated: 189

fun buildPlot(data: Map<String, List<*>>, countryCol: String, title: String) =
    letsPlot(data) +
    geomBar(sampling = samplingPick(10), color = "pen", size = 0.3) {
        x = asDiscrete(countryCol, orderBy = "..count..")
        fill = countryCol
    } +
    ggtitle(title) +
    labs(x = "country", fill = "country") +
    guides(fill = guideLegend(nrow = 2, byRow = true, color = "white")) +
    theme(
        axisTextX = elementBlank(),
        axisTicks = elementBlank(),
        plotMessage = elementBlank(),
        legendTitle = elementBlank(),
        plotTitle = elementText(hjust = 1.0)
    ).legendPositionBottom()

// Create 3 bar-charts, each showing the top 10 countries by count of:
// 1. Non-migrated laureates (born and died in same country)
// 2. Immigrated laureates (by country of death)
// 3. Emigrated laureates (by country of birth)
val plots = listOf(
    buildPlot(notMigrated, "born_country_code", "by non migrated laureates"),
    buildPlot(migrated,    "died_country_code", "by immigrated laureates"),
    buildPlot(migrated,    "born_country_code", "by emigrated laureates")
)

val plots1 = plots.map { it + scaleFillBrewer(palette = "Paired") }

gggrid(plots1, ncol = 3) +
    ggtitle("Top 10 Countries") +
    theme(plotTitle = elementText(face = "bold", hjust = 0.5))

// Get unique country codes from both dataframes
val allCountryCodes = ((notMigrated["born_country_code"] as List<*>) +
    (notMigrated["died_country_code"] as List<*>) +
    (migrated["born_country_code"] as List<*>) +
    (migrated["died_country_code"] as List<*>))
    .filterNotNull()
    .map { it.toString() }
    .distinct()
    .sorted()

println(allCountryCodes.size)

73

// Unfortunately, 73 unique colors is more than categorical palettes can handle effectively.
// Let's limit this to only countries that appear in the top 10 of any chart.

// Get top 10 countries from each chart
fun top10(data: Map<String, List<*>>, col: String): List<String> =
    (data[col] as List<*>)
        .filterNotNull()
        .groupingBy { it }
        .eachCount()
        .entries
        .sortedWith(compareByDescending<Map.Entry<Any, Int>> { it.value }.thenBy { it.key.toString() })
        .take(10)
        .map { it.key.toString() }

val top10NotMigrated  = top10(notMigrated, "born_country_code")
val top10MigratedBorn = top10(migrated,    "born_country_code")
val top10MigratedDied = top10(migrated,    "died_country_code")

// Combine and get unique countries that appear in any chart
val uniqueCountries = (top10NotMigrated + top10MigratedBorn + top10MigratedDied).distinct().sorted()
println(uniqueCountries)
println("${uniqueCountries.size} unique countries")

[AT, CA, CH, DE, DK, ES, FR, GB, HU, IE, IL, IT, NL, PL, RU, SE, US]
17 unique countries

// 17 countries still exceeds a single Brewer palette.
// Combine two palettes: 10 colors from 'Paired' + 7 from 'Pastel1'.
val palettePaired = scaleColorBrewer(palette = "Paired").palette(10)
val palettePastel = scaleColorBrewer(palette = "Pastel1").palette(7)
val countryColors = palettePaired + palettePastel

// Create a manual color scale mapping each country to its color, then apply to all plots.
val manualScale = scaleFillManual(values = uniqueCountries.zip(countryColors).toMap())

val plots2 = plots.map { it + manualScale }

gggrid(plots2, ncol = 3, guides = "collect") +
    ggtitle("Top 10 Countries") +
    theme(plotTitle = elementText(face = "bold", hjust = 0.5))

firstname	surname	born_country_code	died_country_code	gender	year	category	share	name_of_university	city_of_university	country_of_university	born_month	age	age_get_prize
Wilhelm Conrad	Röntgen	DE	DE	male	1901	physics	1	Munich University	Munich	Germany	Mar	78	56
Hendrik A.	Lorentz	NL	NL	male	1902	physics	2	Leiden University	Leiden	the Netherlands	Jul	75	49
Pieter	Zeeman	NL	NL	male	1902	physics	2	Amsterdam University	Amsterdam	the Netherlands	May	78	37

Generating Color Palettes with `scale.palette()`¶

Problem: Independent Color Scales¶

Solution: Using a Shared Palette¶

Generating Color Palettes with scale.palette()¶

Problem: Independent Color Scales¶

Solution: Using a Shared Palette¶

Generating Color Palettes with `scale.palette()`¶