标签为值从列表中的R

示例数据计算：标签为值从列表中的R

#Referece set 
Newyork <- c("ant", "bat", "cat", "dog", "unicorn", "camel", "snake", "monkey", "donkey") 
Tokyo <- c("unicorn") 
Yokohama <- c("ant", "bat", "cat", "dog") 
Chicago <- c("bird", "ant", "bat", "cat", "bear", "dog", "snake", "monkey", "mouse", " donkey", "octopus", "camel") 
Nashville <-c("ant", "bat", "octopus") 
DC <- c("ant", "dog", "cat", "bird") 
Boston <- c("ant", "bird", "cat", "bear", "camel", "snake", "mouse", "octopus") 

#query set 
Elendel <- c("wolf", "dog" ,"ant") 

#combining References 
Refcities <- list(Boston, Chicago, DC, Nashville, Newyork, Tokyo, Yokohama)

代码：

#outersect 
outersect <- function(x, y) { 
    big.vec <- c(x, y) 
    duplicates <- big.vec[duplicated(big.vec)] 
    setdiff(big.vec, unique(duplicates)) 
} 

#combining intersect and outersect for unweighted similarity 
unweighted <- function(x, y,...){ 
    len.inter <- length (intersect(x, y)) 
    len.outer <- length (outersect(x, y)) 
    len.add <- len.inter-len.outer 
    len.add 
} 

#single line recursive  
UWshort <- function(x, y) { 
    i <- y 
    countermax <- length(Refcities) 
    while (i <= countermax) { 
    print (unweighted (x, Refcities[[i]])) 
    i = i+1 } 
} 

UWshort(Elendel, 1)

我有上面的代码这给我一个数，其等于（共享动物＃） - （＃不共享）来比较一个城市与7个参考城市的动物。

测试结果如下：

[1] -8 
[1] -9 
[1] -1 
[1] -3 
[1] -6 
[1] -4 
[1] -1

如何添加回在城市的参考，并根据结果按数字顺序进行排序？

理想的输出看起来象下面这样：

City  Score 
[1] DC  -1 
[2] Yokohama -1 
[3] Nashville -3 
[4] Tokyo  -4 
[5] Newyork -6 
[6] Boston -8 
[7] Chicago -9

来源

2017-02-10 Charles Williams

为什么不使用矩阵或数据框，并使用向量化调用计算动物：'table'，'tapply'，'ave'，'aggregate'？你能解释一下结果吗？例如，当它与至少一个其他城市共享其所有动物时，DC = -1如何？ – Parfait

这将返回城市名称为rownames，但是这是一个简单的副本，如果你宁愿它是一列

Refcities <- list(Boston=Boston, Chicago=Chicago, DC=DC, Nashville=Nashville, Newyork=Newyork, Tokyo=Tokyo, Yokohama=Yokohama) 

score <- sort(sapply(Refcities, unweighted, Elendel), decreasing = TRUE) 

df1 <- data.frame(score) 

> df1 
      score 
DC   -1 
Yokohama  -1 
Nashville -3 
Tokyo  -4 
Newyork  -6 
Boston  -8 
Chicago  -9

来源

2017-02-10 22:09:40 manotheshark

保持您的outersect功能，但然后前往tidyverse。

#outersect 
outersect <- function(x, y) { 
big.vec <- c(x, y) 
duplicates <- big.vec[duplicated(big.vec)] 
setdiff(big.vec, unique(duplicates)) 
} 

#Reference set 
Newyork <- c("ant", "bat", "cat", "dog", "unicorn", "camel", "snake", "monkey", "donkey") 
Tokyo <- c("unicorn") 
Yokohama <- c("ant", "bat", "cat", "dog") 
Chicago <- c("bird", "ant", "bat", "cat", "bear", "dog", "snake", "monkey", "mouse", " donkey", "octopus", "camel") 
Nashville <-c("ant", "bat", "octopus") 
DC <- c("ant", "dog", "cat", "bird") 
Boston <- c("ant", "bird", "cat", "bear", "camel", "snake", "mouse", "octopus") 

#query set 
Elendel <- c("wolf", "dog" ,"ant") 

library(tidyverse) 
#combining References 
cities <- c('Boston', 'Chicago', 'DC', 'Nashville', 'Newyork', 'Tokyo', 'Yokohama') 
Refcities <- list(Boston = Boston 
        , Chicago = Chicago 
        , DC = DC 
        , Nashville = Nashville 
        , Newyork = Newyork 
        , Tokyo = Tokyo 
        , Yokohama = Yokohama) 

df <- data_frame(City = cities 
        , inter = sapply(Refcities, function(x) { 
         length(intersect(x, Elendel)) 
         }) 
        , outer = sapply(Refcities, function(x){ 
         length(outersect(x, Elendel)) 
         }) 
        ) %>% 
    mutate(Score = inter - outer) %>% 
    arrange(desc(Score)) %>% select(City, Score)

来源

2017-02-11 05:41:05

标签为值从列表中的R

回答

相关问题