Selecting representative tokens in R
25 Feb 2017 | all notes
alldata <- read.csv("Exp1CompRRVals.csv", stringsAsFactors=FALSE)
meaning.columns <- (ncol(alldata)-7) : ncol(alldata)
colnames(alldata)[meaning.columns] <- tolower(colnames(alldata)[meaning.columns])
meanings <- colnames(alldata)[meaning.columns]
cells <- NULL
cellsize <- 25
onewordperarea <- TRUE
selecttop <- function(ordereddata, meaning, n=cellsize) {
thismeaningcolumn <- match(meaning, colnames(ordereddata))
colnames(ordereddata)[thismeaningcolumn] <- "TargetMeaningRR"
# split matching and non-matching words into two data sets
correct <- subset(ordereddata, Meaning == meaning)
correctwords <- unique(correct$Word)
# only consider incorrect words which are of comparable length
correctwordlength <- range(sapply(correctwords, nchar))
incorrect <- subset(ordereddata, Meaning != meaning &
# only consider phonetic words which are incorrect in all languages
!(Word %in% correctwords) &
Length >= correctwordlength[1] &
Length <= correctwordlength[2])
# incorrectwords <- setdiff(unique(incorrect$Word), correctwords)
incorrectwords <- unique(incorrect$Word)
if (onewordperarea) {
# find n unique words which are also from n different glotto families
correctwordrows <- 1
incorrectwordrows <- 1
for (i in 2:n) {
# find next entry which is a new phonetic word and new glotto family
nextcorrectword <- subset(correct,
!(Word %in% correct$Word[correctwordrows]) &
!(Glotto %in% correct$Glotto[correctwordrows]))[1, c("Language", "Word")]
nextincorrectword <- subset(incorrect,
!(Word %in% incorrect$Word[incorrectwordrows]) &
!(Glotto %in% incorrect$Glotto[incorrectwordrows]))[1, c("Language", "Word")]
correctwordrows[i] <- which(correct$Language == nextcorrectword[[1]] & correct$Word == nextcorrectword[[2]])
incorrectwordrows[i] <- which(incorrect$Language == nextincorrectword[[1]] & incorrect$Word == nextincorrectword[[2]])
}
} else {
correctwordrows <- match(head(correctwords, n), correct$Word)
incorrectwordrows <- match(head(incorrectwords, n), incorrect$Word)
}
rbind(cbind(TargetMeaning=meaning, Matching=TRUE,
correct[correctwordrows, -setdiff(meaning.columns, thismeaningcolumn)]),
cbind(TargetMeaning=meaning, Matching=FALSE,
incorrect[incorrectwordrows, -setdiff(meaning.columns, thismeaningcolumn)]))
}
for (meaning in meanings) {
print(paste("Meaning", meaning))
data <- alldata[order(alldata[[meaning]], na.last=NA),]
mean <- mean(data[[meaning]])
median <- median(data[[meaning]])
print(paste("RR range from", data[1, meaning], "to", data[nrow(data), meaning],
"mean", mean, "median", median))
# simply reverse data frame order for highest
cells <- rbind(cells, cbind(RRclass="H", selecttop(data[nrow(data):1,], meaning)),
cbind(RRclass="L", selecttop(data, meaning)))
# select middle range:
# find range that is
highestlow <- max(subset(cells, TargetMeaning==meaning & RRclass=="L")$TargetMeaningRR)
lowesthigh <- min(subset(cells, TargetMeaning==meaning & RRclass=="H")$TargetMeaningRR)
untakenrangemean <- mean(c(highestlow, lowesthigh))
untakenrangemedian <- mean(sapply(c(highestlow, lowesthigh),
function(rr) which(data[[meaning]] == rr)[1]))
# order by absolute difference from mean or median (or untakenrangemean)
# data <- data[order(abs(data[[meaning]] - freerangemean)),]
# order by rank difference from median (nrow(data)/2) or untakenrangemedian
data <- data[order(abs(1:nrow(data) - untakenrangemedian)),]
cells <- rbind(cells, cbind(RRclass="M", selecttop(data, meaning)))
}
## [1] "Meaning bone"
## [1] "RR range from -2.36736894075312 to 1.26387134791426 mean -0.263666975626609 median -0.249721796954716"
## Warning in incorrectwordrows[i] <- which(incorrect$Language ==
## nextincorrectword[[1]] & : number of items to replace is not a multiple of
## replacement length
## [1] "Meaning breasts"
## [1] "RR range from -2.77010718884069 to 2.07297482061656 mean -0.335395309584245 median -0.352250311107756"
## [1] "Meaning dog"
## [1] "RR range from -2.46764753469791 to 1.75576955200288 mean -0.117006344871001 median -0.164695465190664"
## [1] "Meaning i"
## [1] "RR range from -5.11753380317081 to 1.51119327877114 mean -1.38262605491057 median -1.34345715548717"
## [1] "Meaning name"
## [1] "RR range from -3.41706133227802 to 1.51308486418524 mean -0.220013509567489 median -0.164209431621072"
## [1] "Meaning nose"
## [1] "RR range from -2.20471918813919 to 2.09233201159002 mean 0.12941416053513 median 0.13880894488938"
## [1] "Meaning tongue"
## [1] "RR range from -2.62921026006487 to 3.31591242468841 mean 0.164852303722952 median 0.142145751702432"
## [1] "Meaning we"
## [1] "RR range from -3.88497569181261 to 1.75264626903018 mean -0.350910710343393 median -0.352226981837328"
# safety check that all cells are completely filled
nrow(cells) == cellsize * length(meanings) * 6
## [1] TRUE
# and that we have the right number of unique (phonetic) words per target meaning
max(unique(xtabs(~ TargetMeaning + Word, data=cells))) == 1
## [1] TRUE
Make sure that the high/mid/low RR regions do not overlap – the six rows are the maximum/minimum target RR for the High, Mid and Low RR cells.
# reorder levels logically
cells$RRclass <- factor(cells$RRclass, levels = c("L", "M", "H"))
knitr::kable(sapply(as.character(unique(cells$TargetMeaning)),
function(m) sapply(levels(cells$RRclass),
function(rr) rev(range(subset(cells, RRclass==rr & TargetMeaning==m)$TargetMeaningRR)))))
bone | breasts | dog | i | name | nose | tongue | we |
---|---|---|---|---|---|---|---|
-0.8395966 | -1.1633815 | -0.6913152 | -2.1246413 | -1.0060987 | -0.6881875 | -0.4422010 | -0.9260865 |
-2.3673689 | -2.7701072 | -2.4676475 | -5.1175338 | -3.4170613 | -2.2047192 | -2.6292103 | -3.8849757 |
-0.1991991 | -0.2843177 | -0.0985610 | -0.9038066 | -0.1125254 | 0.1878537 | 0.3098318 | -0.1771448 |
-0.2346438 | -0.3230503 | -0.1422651 | -0.9881234 | -0.1615821 | 0.1496679 | 0.2672721 | -0.2329270 |
1.2638713 | 2.0729748 | 1.7557696 | 1.5111933 | 1.5130849 | 2.0923320 | 3.3159124 | 1.7526463 |
0.5698946 | 1.0207622 | 0.9114997 | 0.6486908 | 0.7162239 | 1.1808909 | 1.8623390 | 0.8233005 |
Check for words which made it into more than one cell due to overlap – overlap in ‘I’ can only be avoided by choosing the mid-RRs based on their rank within all of the RRs which aren’t covered by the high/low region.
cells[which(duplicated(cells[, c("TargetMeaning", "Language", "Word")])),
c("RRclass", "TargetMeaning", "Meaning", "Language", "Word", "TargetMeaningRR")]
## [1] RRclass TargetMeaning Meaning Language
## [5] Word TargetMeaningRR
## <0 rows> (or 0-length row.names)
if (onewordperarea)
cells[which(duplicated(cells[, c("RRclass", "TargetMeaning", "Matching", "Glotto")]))]
## data frame with 0 columns and 1200 rows
Compare the distribution of word lengths chosen for the matching/mismatching cells for every meaning.
x <- xtabs(~ Matching + TargetMeaning + RRclass + Length, data=cells)
lattice::barchart(x/array(rowSums(x, dim=3), dim=dim(x)), auto.key=TRUE)
Finally, look at the representedness of language areas in the sample.
perglotto <- table(cells$Glotto, cells$TargetMeaning)
knitr::kable(cbind(perglotto, total=rowSums(perglotto)))
bone | breasts | dog | i | name | nose | tongue | we | total | |
---|---|---|---|---|---|---|---|---|---|
Abkhaz-Adyge | 0 | 2 | 1 | 1 | 1 | 0 | 0 | 0 | 5 |
Afro-Asiatic | 5 | 5 | 5 | 5 | 6 | 5 | 4 | 5 | 40 |
Aikana | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 2 |
Ainu | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Algic | 0 | 3 | 2 | 2 | 2 | 3 | 1 | 2 | 15 |
Angan | 3 | 1 | 2 | 3 | 2 | 0 | 2 | 1 | 14 |
Anson_Bay | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 4 |
Araucanian | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 5 |
Arawakan | 2 | 2 | 2 | 2 | 5 | 2 | 2 | 4 | 21 |
Arawan | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
Athapaskan-Eyak-Tlingit | 0 | 2 | 1 | 2 | 1 | 1 | 1 | 1 | 9 |
Atlantic-Congo | 6 | 6 | 5 | 6 | 6 | 6 | 6 | 6 | 47 |
Austroasiatic | 4 | 1 | 4 | 3 | 3 | 1 | 2 | 3 | 21 |
Austronesian | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 6 | 48 |
Barbacoan | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | 5 |
Basque | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
Birri | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Blue_Nile_Mao | 0 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 3 |
Bogaya | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
Boran | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 3 |
Border | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
Bosavi | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 2 | 5 |
Bulaka_River | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 3 |
Bunaban | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 3 |
Burushaski | 0 | 1 | 0 | 0 | 0 | 1 | 2 | 1 | 5 |
Caddoan | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 2 |
Cahuapanan | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Cariban | 3 | 3 | 2 | 3 | 0 | 2 | 4 | 1 | 18 |
Central_Sudanic | 2 | 3 | 3 | 2 | 1 | 2 | 2 | 3 | 18 |
Chapacuran | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 3 |
Chibchan | 2 | 2 | 1 | 0 | 0 | 2 | 1 | 1 | 9 |
Chimakuan | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Chocoan | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
Chukotko-Kamchatkan | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 2 |
Cochimi-Yuman | 1 | 0 | 1 | 2 | 0 | 1 | 0 | 0 | 5 |
Comecrudan | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 2 |
Daju | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 2 | 6 |
Dibiyaso | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 2 |
Dizoid | 0 | 0 | 1 | 1 | 2 | 1 | 0 | 0 | 5 |
Dogon | 0 | 0 | 2 | 0 | 1 | 0 | 1 | 1 | 5 |
Doso-Turumsa | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 2 |
Dravidian | 4 | 3 | 1 | 1 | 1 | 0 | 3 | 1 | 14 |
East_Bird’s_Head | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 3 |
East_Strickland | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 1 | 3 |
Eastern_Daly | 2 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 4 |
Eastern_Trans-Fly | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 3 |
Eleman | 1 | 1 | 1 | 1 | 0 | 1 | 2 | 2 | 9 |
Eskimo-Aleut | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 2 |
Fasu | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Gaagudju | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
Giimbiyu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
Goilalan | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2 |
Great_Andamanese | 0 | 0 | 1 | 1 | 0 | 1 | 2 | 1 | 6 |
Guaicuruan | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
Gumuz | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 4 |
Gunwinyguan | 2 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 6 |
Hatam-Mansim | 1 | 0 | 0 | 1 | 2 | 0 | 0 | 2 | 6 |
Heiban | 0 | 1 | 2 | 2 | 1 | 1 | 2 | 2 | 11 |
Hibito-Cholon | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 2 |
Hmong-Mien | 0 | 1 | 1 | 2 | 2 | 1 | 0 | 1 | 8 |
Huavean | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 3 |
Huitotoan | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 2 |
Ijoid | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 3 |
Inland_Gulf_of_Papua | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 2 |
Iroquoian | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 4 |
Itonama | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Iwaidjan_Proper | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 3 |
Japonic | 0 | 2 | 0 | 1 | 0 | 1 | 2 | 1 | 7 |
Jivaroan | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 4 |
Jodi | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
Kadugli-Krongo | 2 | 0 | 2 | 1 | 1 | 1 | 0 | 0 | 7 |
Kaki_Ae | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Kakua-Nukak | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
Kamsa | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 2 |
Kanoe | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
Kapauri | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 2 |
Kartvelian | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 3 |
Katla-Tima | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 3 |
Kawesqar | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
Khoe-Kwadi | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 3 |
Kiowa-Tanoan | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Kiwaian | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 3 |
Klamath-Modoc | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 3 |
Kolopom | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
Koman | 0 | 2 | 1 | 0 | 0 | 2 | 2 | 1 | 8 |
Korean | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
Kosare | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2 |
Kresh-Aja | 1 | 0 | 0 | 0 | 1 | 2 | 0 | 1 | 5 |
Kujarge | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
Kuliak | 0 | 1 | 0 | 1 | 0 | 2 | 0 | 1 | 5 |
Kunama | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Kuot | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
Kusunda | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
Kwalean | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 2 |
Lakes_Plain | 0 | 1 | 1 | 1 | 1 | 3 | 2 | 1 | 10 |
Leko | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
Lengua-Mascoy | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 2 |
Lepki-Murkim | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
Limilngan | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 2 |
Lower_Sepik-Ramu | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 3 |
Maban | 0 | 2 | 0 | 0 | 0 | 2 | 2 | 0 | 6 |
Maiduan | 2 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 4 |
Mande | 2 | 0 | 3 | 2 | 2 | 1 | 1 | 1 | 12 |
Maningrida | 1 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 5 |
Manubaran | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
Marindic | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 4 |
Matacoan | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 4 |
Mayan | 0 | 1 | 1 | 1 | 2 | 2 | 0 | 1 | 8 |
Mirndi | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 3 |
Misumalpan | 0 | 0 | 1 | 3 | 0 | 1 | 0 | 1 | 6 |
Miwok-Costanoan | 3 | 2 | 1 | 0 | 1 | 0 | 2 | 1 | 10 |
Mixe-Zoque | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 4 |
Molala | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 4 |
Mombum | 0 | 2 | 1 | 0 | 2 | 2 | 0 | 0 | 7 |
Mongolic | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 3 |
Morehead-Wasur | 0 | 0 | 0 | 0 | 1 | 2 | 2 | 0 | 5 |
Mpur | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
Muniche | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 2 |
Muskogean | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 3 |
Nakh-Daghestanian | 2 | 4 | 0 | 3 | 1 | 3 | 3 | 3 | 19 |
Nambiquaran | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 2 |
Narrow_Talodi | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 4 |
Natchez | 0 | 0 | 1 | 0 | 0 | 2 | 1 | 0 | 4 |
Ndu | 0 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 4 |
Nilotic | 3 | 1 | 5 | 2 | 3 | 0 | 2 | 3 | 19 |
Nimboran | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
Nivkh | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
North_Bougainville | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2 |
North_Halmahera | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 4 |
Northern_Daly | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 2 | 4 |
Nubian | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 3 |
Nuclear_Torricelli | 2 | 3 | 2 | 2 | 2 | 2 | 3 | 3 | 19 |
Nuclear_Trans_New_Guinea | 6 | 6 | 6 | 5 | 6 | 5 | 6 | 6 | 46 |
Nuclear-Macro-Je | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 1 | 5 |
Nyulnyulan | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 3 |
Ongota | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 2 |
Otomanguean | 4 | 3 | 3 | 5 | 5 | 4 | 1 | 1 | 26 |
Paez | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 2 |
Palaihnihan | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 2 |
Pama-Nyungan | 2 | 5 | 5 | 4 | 4 | 3 | 4 | 3 | 30 |
Panoan | 2 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 5 |
Pauwasi | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 2 |
Peba-Yagua | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 3 |
Puelche | 0 | 0 | 0 | 0 | 1 | 1 | 3 | 0 | 5 |
Purari | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Quechuan | 2 | 1 | 2 | 0 | 0 | 0 | 2 | 1 | 8 |
Sahaptian | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 5 |
Saharan | 0 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 9 |
Salishan | 1 | 2 | 2 | 2 | 1 | 1 | 1 | 0 | 10 |
Sandawe | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 |
Sentanic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 |
Sepik | 1 | 2 | 2 | 0 | 1 | 1 | 3 | 2 | 12 |
Sino-Tibetan | 6 | 6 | 4 | 5 | 6 | 3 | 5 | 6 | 41 |
Siouan | 1 | 1 | 0 | 2 | 1 | 2 | 3 | 0 | 10 |
Siuslaw | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 2 |
Sko | 1 | 1 | 1 | 1 | 0 | 0 | 2 | 0 | 6 |
Songhay | 1 | 1 | 0 | 0 | 1 | 0 | 2 | 1 | 6 |
South_Bird’s_Head_Family | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 2 | 7 |
South_Omotic | 0 | 0 | 0 | 2 | 1 | 1 | 1 | 1 | 6 |
Southern_Daly | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 6 |
Suki-Gogodala | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Surmic | 2 | 1 | 1 | 3 | 2 | 1 | 2 | 2 | 14 |
Ta-Ne-Omotic | 1 | 1 | 1 | 0 | 1 | 4 | 2 | 1 | 11 |
Tacanan | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 6 |
Tai-Kadai | 3 | 2 | 2 | 4 | 1 | 4 | 1 | 3 | 20 |
Taiap | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 2 |
Teberan | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 3 |
Ticuna-Yuri | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
Timor-Alor-Pantar | 1 | 1 | 1 | 3 | 0 | 2 | 0 | 2 | 10 |
Timucua | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
Tirio | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
Totonacan | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 2 | 5 |
Touo | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 3 |
Tucanoan | 2 | 3 | 0 | 1 | 2 | 2 | 3 | 1 | 14 |
Tungusic | 1 | 0 | 3 | 1 | 1 | 1 | 1 | 3 | 11 |
Tunica | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 2 |
Tupian | 2 | 1 | 3 | 0 | 2 | 2 | 2 | 3 | 15 |
Turama-Kikori | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 6 |
Turkic | 3 | 2 | 1 | 1 | 0 | 1 | 2 | 1 | 11 |
Uralic | 1 | 3 | 1 | 3 | 1 | 2 | 4 | 0 | 15 |
Uru-Chipaya | 0 | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 3 |
Uto-Aztecan | 2 | 0 | 2 | 1 | 3 | 1 | 2 | 3 | 14 |
Vilela | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
Wagiman | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 2 |
Wakashan | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
Waorani | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 2 |
Warao | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
West_Bomberai | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 3 |
Western_Daly | 0 | 0 | 2 | 1 | 3 | 0 | 0 | 2 | 8 |
Wintuan | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
Wiru | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 |
Yamana | 0 | 1 | 2 | 0 | 0 | 0 | 1 | 0 | 4 |
Yana | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 2 |
Yanomamic | 0 | 0 | 0 | 1 | 1 | 2 | 1 | 1 | 6 |
Yareban | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 2 |
Yaruro | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
Yeli_Dnye | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Yokutsan | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 5 |
Yuchi | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Yukaghir | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 2 |
Yuki-Wappo | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Yurakare | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
Zaparoan | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
write.csv(cells, file="Exp1-meanings-by-matching-by-RR.csv", row.names=FALSE, quote=FALSE)
longdata <- reshape2::melt(data, id.vars="Meaning", measure.vars=meaning.columns)
# distribution of RRs per meaning (accurate words only)
#lattice::histogram(~ value | Meaning, type="count", layout=c(4, 2))
lattice::densityplot(~ value, groups=Meaning, subset(longdata, Meaning == variable),
plot.points=FALSE, auto.key=TRUE, xlim=c(-6, 4))
# distribution of RRs per meaning (incorrect words only)
#lattice::histogram(~ value | variable, type="count", layout=c(4, 2))
lattice::densityplot(~ value, groups=variable, subset(longdata, Meaning != variable),
plot.points=FALSE, auto.key=TRUE, xlim=c(-6, 4))