############################################################## # An Instance Theory of Semantic Memory # Jamieson et al. (2018, Computational Brain and Behaviour) ############################################################## # Clear the work environment rm(list = ls()) # Name the corpus Corpus <- "tasaDocsPara.txt" # Set parameters N_dimensions <- 20000 Tau <- 3 Forgetting_rate <- 0 # Load useful libraries (tm and LSAfun need to be installed) library(tm) library(LSAfun) # Get unique words in the corpus Unique_words <- unique(scan(file=Corpus, what="char", sep="")) # Construct the environmental word vectors from a Gaussian with mean = 0 and sd = 1/sqrt(n) Words <- matrix(rnorm(length(Unique_words)*N_dimensions, 0, 1/sqrt(N_dimensions)), length(Unique_words), N_dimensions) rownames(Words) <- Unique_words # Get all exemplar-documents in the corpus Sentences <- scan(file=Corpus, what="char", sep="\n") # Construct exemplar-document record of the corpus Memory <- matrix(0, length(Sentences), N_dimensions) for (i in 1:length(Sentences)) { Tmp <- strsplit(Sentences[i], " ")[[1]] if (length(Tmp) > 0) { for (j in 1:length(Tmp)) { Memory[i,] <- Memory[i,] + Words[Tmp[j],] } } } rownames(Memory) <- Sentences # Vector cosine Cosine <- function (x, y) { z <- 0 if (sum(abs(x)) != 0 & sum(abs(y)) != 0) z <- sum(x*y) / sqrt(sum(x*x) * sum(y*y)) return(z) } # Get cosine similarity table for all rows in a matrix of semantic vectors Cosine_table <- function (M) { Tmp <- matrix(0, nrow(M), nrow(M)) for (i in 1:nrow(M)) { for (j in 1:nrow(M)) { if (sum(abs(M[i,])) != 0 & sum(abs(M[j,])) != 0) Tmp[i,j] <- sum(M[i,]*M[j,]) / sqrt(sum(M[i,]*M[i,]) * sum(M[j,]*M[j,])) } } rownames(Tmp) <- rownames(M) colnames(Tmp) <- rownames(M) return(Tmp) } # Introduce forgetting to memory (Forgetting_rate = 1 - L in the original MINERVA 2 model) p <- matrix(runif(nrow(Memory)*ncol(Memory), 0, 1), nrow(Memory), ncol(Memory)) Memory <- Memory * ifelse(p < Forgettting_rate, 0, 1) rm(p) # Get semantic vectors (P = List of probe words, W = Words matrix, M = Memory matrix, Tau = Retrieval gradient) Semantic_subset <- function (P, W, M, Tau) { S <- matrix(0, length(P), ncol(M)) for (i in 1:length(P)) { Probe <- unlist(strsplit(P[i], split="/")) if (all(Probe %in% rownames(W)) == TRUE) { for (j in 1:nrow(M)) { A <- 1.0 for (k in 1:length(Probe)) { A <- A * Cosine(W[Probe[k],], M[j,])**Tau } S[i,] <- S[i,] + A * M[j,] } } } rownames(S) <- P S <- S[abs(rowSums(S)) != 0,] return(S) } # Words from Jones & Mewhort (2007, Figure 3) to demonstrate taxonomic clustering JonesMewhort_Figure3 <- c("financial","savings","finance","pay","invested", "loaned","borrow","lend","invest","investments", "bank","spend","save","astronomy","physics", "chemistry","psychology","biology","scientific", "mathematics","technology","scientists","science", "research","sports","team","teams","football", "coach","sport","players","baseball","soccer", "tennis","basketball") # Get semantic space Probe_list <- JonesMewhort_Figure3 Semantic_space <- Semantic_subset(P = Probe_list, W = Words, M = Memory, Tau = 3) # Plot the results as a clustered correlation plot corrplot::corrplot(Cosine_table(Semantic_space), method="circle", order="hclust", tl.col="black", tl.cex=.4, title="", cl.cex=.5) # Plot the results as a 2d MDS solution plot_wordlist(rownames(Semantic_space), tvectors=Semantic_space, method="MDS", dims=2)