getDocumentTermMatrix <- function (text.vector, document.names=NULL) {
  # Function to convert a vector of charachters into a document-term matrix
  # ARGS
  # text.vector    - A vector of type character. Each element should be a document
  # document.names - A vector containing the names to be assigned to the documents. If not provided
  #                  default names are used
  #
  # RETURN
  #   The function returns a list with two elements, the document-term matrix and a list of words
  #
  
  # First step, split the documents using blank space
  split <- strsplit(text.vector, " ")
  
  # Determine the words in the documents and remove empty strings
  words <- unique(unlist(split))
  words <- words[words!=""]
  
  # Now we will count the number of apparitions of each word in each document, using a couple of functions
  # The first one counts the apparitions of a word in a text, the second one uses that to get the counts
  # for the whole list of words
  
  countApparitions <- function(word, word.vector) {
    return(sum(word.vector==word))
  }
  
  countWords <- function(word.vector) {
    sapply(words, FUN=countApparitions, word.vector=word.vector)
  }
  
  # Apply the function to all the documents
  aux <- lapply(split, FUN=countWords)
  matrix <- do.call(rbind, aux)
  
  # Set the names of the documents
  if(is.null(document.names)) {
    rownames(matrix) <- paste("Doc", 1:nrow(matrix), sep="")
  }else{
    if (length(document.names)!=length(text.vector)) {
      stop("The length of the two vectors provided has to be the same")
    }
    rownames(matrix) <- document.names
  }
  
  return(list(doc.term.matrix=matrix, words=words))
}

# Example of use with some tweets

tweets <- read.csv("tweets_CEC2017.csv", stringsAsFactors=FALSE)

res <- getDocumentTermMatrix(tweets[,1])
head(res$doc.term.matrix)
res$words