2014-08-28 69 views
0

一些自动化水平的联合,我有我的数据集一个变量,其中有大约2000因子水平,但许多层面都相当类似:如何R中

"7" "BE Classifieds - Village Voice - Display" 
"8" "Bridgevine Online" 
"9" "British Columbia BNE - Group 1" 
"10" "British Columbia BNE - Group 10" 
"11" "British Columbia BNE - Group 11" 
"12" "British Columbia BNE - Group 12" 
"13" "British Columbia BNE - Group 13" 
"14" "British Columbia BNE - Group 14" 
"15" "British Columbia BNE - Group 2" 
"16" "British Columbia BNE - Group 3" 
"17" "British Columbia BNE - Group 4" 
"18" "British Columbia BNE - Group 5" 
"19" "British Columbia BNE - Group 6" 
"20" "British Columbia BNE - Group 7" 
"21" "British Columbia BNE - Group 8" 
"22" "British Columbia BNE - Group 9" 
"23" "buyjustenergydefault" 
"24" "C2CBetaBrands" 
"25" "C2CBetaElectrcity" 
"26" "C2CBetaJE-ES" 
"27" "CallerID Callback - Energy Savings" 
"28" "Choose Energy Transfers" 
"29" "Choose Energy Warm Transfers" 
"30" "Clear Corporate Teletech Telesales Transfer to JE" 
"31" "Clear Internet Live Transfer - TX, GA" 
"32" "Clear Internet Transfer - Non-ISTA" 
"33" "commenergy" 
"34" "Commercial to Residential Transfer" 
"35" "Coreg" 
"36" "Customer Service-Energy Savings" 
"37" "CW Leads - Non transfer (ISTA)" 
"38" "CW Leads - Non transfer (Non-ISTA)" 
"39" "D* Just Clean In-Line" 
"40" "D* Transfer - Scripting Test" 
"41" "Default/Unknown Program" 
"42" "DirectStar Live Transfer - Retentions" 
"43" "DStar (FC) Transfer" 
"44" "DStar Leads - Non transfer (ISTA)" 
"45" "DStar Leads - Non transfer (Non-ISTA)" 
"46" "DStar Outbound Friends and Family" 
"47" "Dstar to Energy Georgia Transfer" 
"48" "DStar Transfer - Non-ISTA" 
"49" "DStar Transfer - TX, GA" 
"50" "electricdotcomwebsitedefault" 
"51" "ES-AEP-HAILO-GOOG-DTOP-Competitors-AB" 
"52" "ES-AEP-HAILO-GOOG-DTOP-Nonbrand-AB" 
"53" "ES-AEP-HAILO-GOOG-MOBI-NT-Competitor-AB" 
"54" "ES-AEP-HAILO-GOOG-MOBI-NT-Competitors-AB-Unspecified" 
"55" "ES-AEP-HAILO-GOOG-MOBI-NT-Nonbrand-AB" 
"56" "es-albertaenergyprovidersca-webdefault" 
"57" "ES-CEDS-HAILO-GOOG-DTOP-Brand-GA" 
"58" "ES-CEDS-HAILO-GOOG-DTOP-Brand-NJ" 
"59" "ES-CEDS-HAILO-GOOG-DTOP-Brand-PA" 
"60" "ES-CEDS-HAILO-GOOG-DTOP-Competitors-GA" 
"61" "ES-CEDS-HAILO-GOOG-DTOP-Competitors-NJ" 
"62" "ES-CEDS-HAILO-GOOG-DTOP-Content-GA" 
"63" "ES-CEDS-HAILO-GOOG-DTOP-NonBrand-GA" 
"64" "ES-CEDS-HAILO-GOOG-DTOP-Nonbrand-NJ" 
"65" "ES-CEDS-HAILO-GOOG-DTOP-Nonbrand-PA" 
"66" "ES-CEDS-HAILO-GOOG-MOBI-NT-Brand-GA" 
"67" "ES-CEDS-HAILO-GOOG-MOBI-NT-Brand-NJ" 
"68" "ES-CEDS-HAILO-GOOG-MOBI-NT-Brand-PA" 
"69" "ES-CEDS-HAILO-GOOG-MOBI-NT-Competitors-GA" 
"70" "ES-CEDS-HAILO-GOOG-MOBI-NT-Competitors-GA-Unspecified" 
"71" "ES-CEDS-HAILO-GOOG-MOBI-NT-Competitors-NJ" 
"72" "ES-CEDS-HAILO-GOOG-MOBI-NT-Competitors-NJ-Unspecified" 
"73" "ES-CEDS-HAILO-GOOG-MOBI-NT-Competitors-PA" 
"74" "ES-CEDS-HAILO-GOOG-MOBI-NT-Nonbrand-GA" 
"75" "ES-CEDS-HAILO-GOOG-MOBI-NT-Nonbrand-NJ" 
"76" "ES-CEDS-HAILO-GOOG-MOBI-NT-Nonbrand-PA" 
"77" "ES-CEDS-HAILO-MSN-DTOP-Brand-GA" 
"78" "ES-CEDS-HAILO-MSN-DTOP-Brand-NJ" 
"79" "ES-CEDS-HAILO-MSN-DTOP-Brand-PA" 
"80" "ES-CEDS-HAILO-MSN-DTOP-Competitors-GA" 
"81" "ES-CEDS-HAILO-MSN-DTOP-Competitors-NJ" 
"82" "ES-CEDS-HAILO-MSN-DTOP-Nonbrand-GA" 
"83" "ES-CEDS-HAILO-MSN-DTOP-Nonbrand-NJ" 
"84" "ES-CEDS-HAILO-MSN-DTOP-Nonbrand-PA" 
"85" "ES-EYSG-HAILO-GOOG-DTOP-Competitors-NY" 
"86" "ES-EYSG-HAILO-GOOG-DTOP-Competitors-TX" 
"87" "ES-EYSG-HAILO-GOOG-DTOP-Nonbrand-TX" 
"88" "ES-Google-JE-AB" 
"89" "es-launchpad-electriccompanycom-webdefault" 
"90" "ES-NAT-DTOP-DENT-eEXST-NONE-justenergy.com-011B" 
"91" "ES-NAT-DTOP-DENT-eEXST-NONE-justenergy.com-059nsAB" 
"92" "ES-NAT-DTOP-DENT-eGEOS[direct-energy-alberta]-NONE-albertaenergyproviders.com-DEF" 
"93" "ES-NAT-DTOP-DENT-eHOME-NONE-EnergySavings.com-DEF" 
"94" "ES-NAT-DTOP-DENT-eHOME-NONE-JustEnergy.com-011B" 
"95" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-021nsNY" 
"96" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-024nsOH" 
"97" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-025nsMA" 
"98" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-034nsON" 
"99" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-036nsAB" 
"100" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-037nsNJ" 
"101" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-046nsAB" 
"102" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-053AnsTX" 
"103" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-059nsAB" 
"104" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-086BnsTX" 
"105" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-DEFnsCA" 
"106" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-DEFnsIL" 
"107" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-DEFnsIN" 
"108" "ES-NAT-DTOP-DENT-eHOME-NONE-justenergy.com-DEFnsPA" 
"109" "ES-NAT-DTOP-DENT-eHOME-NONE-newyork.justenergy.com-DEF" 
"110" "ES-NAT-DTOP-DENT-eHOME-NONE-saveonenergy.com-DEF{IL}Just" 
"111" "ES-NAT-DTOP-DENT-eHOME-NONE-saveonenergy.com-DEF{MA}Just" 
"112" "ES-NAT-DTOP-DENT-eHOME-NONE-saveonenergy.com-DEF{NJ}Commerce" 
"113" "ES-NAT-DTOP-DENT-eHOME-NONE-saveonenergy.com-DEF{PA}Commerce" 
"114" "ES-NAT-DTOP-DENT-eHOME-NONE-saveonenergy.com-DEF{PA}Just" 
"115" "ES-NAT-DTOP-DENT-eHOME-NONE-texas.justenergy.com-DEF" 
"116" "ES-NAT-DTOP-DENT-eOTHR-NONE-EnergySavings.com-DEF" 
"117" "ES-NAT-DTOP-DENT-eOTHR-NONE-JustEnergy.com-011B" 
"118" "ES-NAT-DTOP-DENT-eOTHR-NONE-justenergy.com-021nsNY" 
"119" "ES-NAT-DTOP-DENT-eOTHR-NONE-justenergy.com-046nsAB" 
"120" "ES-NAT-DTOP-DENT-eOTHR-NONE-justenergy.com-059nsAB" 
"121" "ES-NAT-DTOP-DENT-eOTHR-NONE-saveonenergy.com-DEF{MA}Just" 
"122" "ES-NAT-DTOP-DENT-eOTHR-NONE-saveonenergy.com-DEF{NJ}Commerce" 
"123" "ES-NAT-DTOP-REFR-eEXST-OTHR-justenergy.com-024nsOH" 
"124" "ES-NAT-DTOP-REFR-eEXST-OTHR-justenergy.com-059nsAB" 
"125" "ES-NAT-DTOP-REFR-eGEOS-OTHR-saveonenergy.com-DEF{NJ}Commerce" 
"126" "ES-NAT-DTOP-REFR-eGEOS-OTHR-saveonenergy.com-DEF{PA}Commerce" 
"127" "ES-NAT-DTOP-REFR-eGEOS-OTHR-saveonenergy.com-DEF{PA}Just" 
"128" "ES-NAT-DTOP-REFR-eHOME-ENRG-JustEnergy.com-011B" 
"129" "ES-NAT-DTOP-REFR-eHOME-ENRG-justenergy.com-046nsAB" 
"130" "ES-NAT-DTOP-REFR-eHOME-ENRG-justenergy.com-DEFnsCA" 
"131" "ES-NAT-DTOP-REFR-eHOME-OTHR-EnergySavings.com-DEF" 
"132" "ES-NAT-DTOP-REFR-eHOME-OTHR-JustEnergy.com-011B" 
"133" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-021nsNY" 
"134" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-024nsOH" 
"135" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-025nsMA" 
"136" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-037nsNJ" 
"137" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-046nsAB" 
"138" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-054nsON" 
"139" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-059nsAB" 
"140" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-074nsTX" 
"141" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-086BnsTX" 
"142" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-DEFnsIL" 
"143" "ES-NAT-DTOP-REFR-eHOME-OTHR-justenergy.com-DEFnsPA" 
"144" "ES-NAT-DTOP-REFR-eHOME-OTHR-newyork.justenergy.com-DEF" 
"145" "ES-NAT-DTOP-REFR-eOTHR-ENRG-JustEnergy.com-011B" 
"146" "ES-NAT-DTOP-REFR-eOTHR-OTHR-EnergySavings.com-DEF" 
"147" "ES-NAT-DTOP-REFR-eOTHR-OTHR-JustEnergy.com-011B" 
"148" "ES-NAT-DTOP-REFR-eOTHR-OTHR-justenergy.com-021nsNY" 
"149" "ES-NAT-DTOP-REFR-eOTHR-OTHR-justenergy.com-025nsMA" 
"150" "ES-NAT-DTOP-REFR-eOTHR-OTHR-justenergy.com-036nsAB" 
"151" "ES-NAT-DTOP-REFR-eOTHR-OTHR-justenergy.com-046nsAB" 
"152" "ES-NAT-DTOP-REFR-eOTHR-OTHR-justenergy.com-059nsAB" 
"153" "ES-NAT-DTOP-REFR-eOTHR-OTHR-justenergy.com-DEFnsIN" 
"154" "ES-NAT-DTOP-REFR-eOTHR-OTHR-justenergy.com-DEFnsPA" 
"155" "ES-NAT-DTOP-REFR-eOTHR-OTHR-newyorkenergyrates.com-DEF" 
"156" "ES-NAT-DTOP-REFR-eOTHR-OTHR-saveonenergy.com-DEF{IL}Just" 
"157" "ES-NAT-DTOP-REFR-eOTHR-OTHR-saveonenergy.com-DEF{NJ}Commerce" 
"158" "ES-NAT-DTOP-REFR-eOTHR-OTHR-saveonenergy.com-DEF{PA}Commerce" 
"159" "ES-NAT-DTOP-REFR-eOTHR-OTHR-saveonenergy.com-DEF{TX}Just" 
"160" "ES-NAT-DTOP-SENG-eEXST-BING-justenergy.com-054nsON" 
"161" "ES-NAT-DTOP-SENG-eEXST-BING-justenergy.com-059nsAB" 
"162" "ES-NAT-DTOP-SENG-eEXST-GOOG-justenergy.com-011B" 
"163" "ES-NAT-DTOP-SENG-eEXST-GOOG-justenergy.com-021nsNY" 
"164" "ES-NAT-DTOP-SENG-eEXST-GOOG-justenergy.com-059nsAB" 
"165" "ES-NAT-DTOP-SENG-eEXST-GOOG-justenergy.com-DEFnse 

所以我不知道如何结合所有以某个前缀开始的级别,例如“British Columbia BNE” - ...

我已经看过combine.levelsvarclus但我不认为这些是正确的命令...有没有一种方法可以通过前缀进行组合?

+1

因此需要定义一个非常具体的规则,指定计算机可以理解的折叠级别。如果以“不列颠哥伦比亚省BNE”开始的级别是您唯一的要求,那么这是可行的,但如果您要求为您的所有级别提供更一般的解决方案,我们需要您提供更多的指导,了解可以折叠到什么程度以及如何应该做出决定。 – MrFlick 2014-08-28 17:07:26

+0

你可能会做一些类似于前10个字符的子字符串(或者你认为正确的很多字符),并将它们分配给这些简化的更少级别的新因子? – lawyeR 2014-08-28 18:13:50

+0

@MrFlick如果你知道如何为不列颠哥伦比亚省BNE做到这一点,那么我可以复制其他因素,如ES-CDS-HAILO – Moderat 2014-08-28 19:01:30

回答

0

我想通了。使用idx = grep(prefix, data.frame)获得索引,然后 data.frame[idx] = blah