i have messy set of strings follows.
string <- c("grp-14994/", "grp-7056 grp-7036/", "grp-24263(24263)/irgc 28588", "grp-15916 /irgc-42176", "grp-614-250b/", "( grp 11432)/irgc-14570", "tourn", "grpp256", "purse", "grp-14956 origin:", "grp 10537", "grp-10096 origin: ", "sgrp123", "grp1234", "ac-30009 (grphana)/", "ac-3060 grp 536-143/old ac", "rgrpfaa/23", "/-", "mgr:7251/", "1216-gr-567/", "x:1 kgrph", "wabgrpvea(ii)", "hr33(bgrp)", "tensor", "wald", "grp12312")
i trying extract instances grp followed digits, may separated space or "-".
my current attempt gives me following result.
gsub("(.*)(\\b)(grp)(-|\\s|)(\\d+)(\\/|\\b)(.*)","\\3\\5", string, ignore.case = t) [1] "grp14994" "grp7056" "grp24263" "grp15916" [5] "grp614" "grp11432" "tourn" "grpp256" [9] "purse" "grp14956" "grp10537" "grp10096" [13] "sgrp123" "grp1234" "ac-30009 (grphana)/" "grp536" [17] "rgrpfaa/23" "/-" "mgr:7251/" "1216-gr-567/" [21] "x:1 kgrph" "wabgrpvea(ii)" "hr33(bgrp)" "tensor" [25] "wald" "grp12312"
but desired output ris
out <- c("grp14994", "grp7056 grp7036", "grp24263", "grp15916", "grp614250", "grp11432", "", "", "", "grp14956", "grp10537", "grp10096", "", "grp1234", "", "grp536143", "", "", "", "", "", "", "", "", "", "grp12312") out [1] "grp14994" "grp7056 grp7036" "grp24263" "grp15916" "grp614250" "grp11432" [7] "" "" "" "grp14956" "grp10537" "grp10096" [13] "" "grp1234" "" "grp536143" "" "" [19] "" "" "" "" "" "" [25] "" "grp12312"
how modify regex desired result?
unlist(lapply(str_extract_all(string,"[gg][rr][pp][-\\s]?\\d+"), function (x) { gsub("[-\\s]+(\\d)", "\\1", paste(x, collapse= " "),perl=t) })) [1] "grp14994" "grp7056 grp7036" "grp24263" [4] "grp15916" "grp614" "grp11432" [7] "" "" "" [10] "grp14956" "grp10537" "grp10096" [13] "grp123" "grp1234" "" [16] "grp536" "" "" [19] "" "" "" [22] "" "" "" [25] "" "grp12312"
Comments
Post a Comment