library(rentrez) keywords <- c("Pi uptake", "Pi absorb", "Pi uptake ability", "Phosphate absorb ability","32P orthophosphate","32P uptake", "33P uptake", "32P radioactivity", "Isotope 32P", "Isotope 33P","33P orthophosphate","33P radioactivity","orthophosphate plant","Pi uptake plant", "Pi absorb plant","Pi transport plant", "Pi uptake ability plant", "Phosphate absorb ability plant", "32P plant","32P orthophosphate plant","32P uptake plant", "33P uptake plant","32P radioactivity plant", "Isotope 32P plant", "Isotope 33P plant","P redistribution plant","33P plant","33P orthophosphate plant","33P radioactivity plant","32P uptake assay","32P uptake analysis","32P uptake assay plant","32P uptake analysis plant","33P uptake assay","33P uptake analysis","33P uptake assay plant","33P uptake analysis plant") get_pubmed_ids <- function(keyword, max_results = 10000) { search_result <- entrez_search(db = "pubmed", term = keyword, retmax = max_results) Sys.sleep(0.5) return(search_result$ids) } keyword_ids <- sapply(keywords, get_pubmed_ids, max_results = 10000) calculate_overlap <- function(keyword1, keyword2) { ids1 <- keyword_ids[[keyword1]] ids2 <- keyword_ids[[keyword2]] overlap <- length(intersect(ids1, ids2)) return(overlap)} overlap_matrix <- matrix(0, nrow=length(keywords), ncol=length(keywords)) rownames(overlap_matrix) <- colnames(overlap_matrix) <- keywords for (i in 1:length(keywords)) { for (j in i:length(keywords)) { if (i != j) { overlap <- calculate_overlap(keywords[i], keywords[j]) overlap_matrix[i, j] <- overlap overlap_matrix[j, i] <- overlap } } } library(igraph) library(ggraph) create_network <- function(overlap_matrix, keyword_ids, keywords) { doc_counts <- sapply(keyword_ids, function(ids) length(unique(ids))) edges <- data.frame( from = rep(keywords, each = length(keywords)), to = rep(keywords, times = length(keywords)), weight = as.vector(overlap_matrix) ) edges <- edges[edges$weight > 0, ] g <- graph_from_data_frame(edges, directed = FALSE) return(list(graph = g, doc_counts = doc_counts)) } result <- create_network(overlap_matrix, keyword_ids, keywords) g <- result$graph node_sizes <- result$doc_counts V(g)$size <- node_sizes / max(node_sizes) * 50 V(g)$label <- V(g)$name ggraph(g, layout = "kk") + geom_edge_link(width = 0.2, alpha = 0.03, color = "#4D4E4F") + geom_node_point(aes(size = node_sizes), color = "#EB7E60") + geom_node_text(aes(label = name), size = 3, color = "black", repel = TRUE, max.overlaps = 1000) + theme_void() + theme(legend.position = "none") + labs(title = "Keyword Network based on PubMed Literature Overlap")