Mercurial > repos > stevecassidy > nltktools
diff test-data/sample_text_frequency.dat @ 0:e991d4e60c17 draft
planemo upload commit 0203cb3a0b40d9348674b2b098af805e2986abca-dirty
author | stevecassidy |
---|---|
date | Wed, 12 Oct 2016 22:17:53 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_text_frequency.dat Wed Oct 12 22:17:53 2016 -0400 @@ -0,0 +1,294 @@ +Word Count Percent +the 44 6.32 +of 26 3.74 +and 25 3.59 +. 24 3.45 +to 23 3.30 +a 15 2.16 +, 12 1.72 +for 12 1.72 +will 12 1.72 +is 11 1.58 +DADA 9 1.29 +some 8 1.15 +( 7 1.01 +be 7 1.01 +on 7 1.01 +that 7 1.01 +this 7 1.01 +Australian 7 1.01 +) 7 1.01 +The 7 1.01 +text 6 0.86 +project 6 0.86 +we 6 0.86 +infrastructure 6 0.86 +from 6 0.86 +have 6 0.86 +in 6 0.86 +video 5 0.72 +language 5 0.72 +data 5 0.72 +it 5 0.72 +collection 5 0.72 +annotation 5 0.72 +Corpus 4 0.57 +with 4 0.57 +build 4 0.57 +audio 4 0.57 +hope 3 0.43 +collections 3 0.43 +resources 3 0.43 +funding 3 0.43 +available 3 0.43 +English 3 0.43 +meta-data 3 0.43 +Macquarie 3 0.43 +done 3 0.43 +two 3 0.43 +corpus 3 0.43 +part 3 0.43 +work 3 0.43 +up 3 0.43 +at 3 0.43 +- 3 0.43 +code 2 0.29 +people 2 0.29 +We 2 0.29 +but 2 0.29 +has 2 0.29 +them 2 0.29 +example 2 0.29 +words 2 0.29 +using 2 0.29 +now 2 0.29 +collect 2 0.29 +each 2 0.29 +corpora 2 0.29 +year 2 0.29 +server 2 0.29 +new 2 0.29 +public 2 0.29 +by 2 0.29 +search 2 0.29 +store 2 0.29 +involves 2 0.29 +within 2 0.29 +texts 2 0.29 +support 2 0.29 +Language 2 0.29 +sentences 2 0.29 +freely 2 0.29 +National 2 0.29 +funded 2 0.29 +site 2 0.29 +an 2 0.29 +as 2 0.29 +able 2 0.29 +make 2 0.29 +subjects 2 0.29 +speech 2 0.29 +development 2 0.29 +recording 2 0.29 +I 2 0.29 +significant 2 0.29 +task 2 0.29 +provide 2 0.29 +ARC 2 0.29 +demo 1 0.14 +automatically 1 0.14 +What 1 0.14 +Service 1 0.14 +being 1 0.14 +both 1 0.14 +soon 1 0.14 +existing 1 0.14 +large 1 0.14 +via 1 0.14 +looks 1 0.14 +Haugh 1 0.14 +still 1 0.14 +find 1 0.14 +alignment 1 0.14 +web 1 0.14 +Recently 1 0.14 +writing 1 0.14 +linguistics 1 0.14 +only 1 0.14 +going 1 0.14 +systems 1 0.14 +under 1 0.14 +Using 1 0.14 +2011 1 0.14 +take 1 0.14 +move 1 0.14 +around 1 0.14 +get 1 0.14 +read 1 0.14 +providing 1 0.14 +Michael 1 0.14 +number 1 0.14 +Project 1 0.14 +next 1 0.14 +While 1 0.14 +Oz 1 0.14 +communities 1 0.14 +comes 1 0.14 +projects 1 0.14 +articles 1 0.14 +like 1 0.14 +visible 1 0.14 +manual 1 0.14 +solution 1 0.14 +'ve 1 0.14 +capability 1 0.14 +these 1 0.14 +continue 1 0.14 +steps 1 0.14 +common 1 0.14 +small 1 0.14 +Speech 1 0.14 +fixed 1 0.14 +Griffith 1 0.14 +searching 1 0.14 +core 1 0.14 +doing 1 0.14 +Since 1 0.14 +idea 1 0.14 +All 1 0.14 +titles 1 0.14 +are 1 0.14 +picked 1 0.14 +Some 1 0.14 +network 1 0.14 +renamed 1 0.14 +managing 1 0.14 +sites 1 0.14 +publish 1 0.14 +research 1 0.14 +Later 1 0.14 +AusNC 1 0.14 +written 1 0.14 +between 1 0.14 +technology 1 0.14 +reading 1 0.14 +can 1 0.14 +recently 1 0.14 +repository 1 0.14 +partners 1 0.14 +This 1 0.14 +University 1 0.14 +hosted 1 0.14 +free 1 0.14 +box 1 0.14 +exposing 1 0.14 +technical 1 0.14 +study 1 0.14 +allows 1 0.14 +forced 1 0.14 +Sign 1 0.14 +published 1 0.14 +map 1 0.14 +MQ 1 0.14 +month 1 0.14 +interviews 1 0.14 +software 1 0.14 +already 1 0.14 +useful 1 0.14 +secure 1 0.14 +'black 1 0.14 +primary 1 0.14 +whatever 1 0.14 +Update 1 0.14 +1000 1 0.14 +parties 1 0.14 +loaded 1 0.14 +centralised 1 0.14 +Auslan 1 0.14 +1900 1 0.14 +size 1 0.14 +little 1 0.14 +Australia 1 0.14 +initial 1 0.14 +been 1 0.14 +Early 1 0.14 +their 1 0.14 +station 1 0.14 +down 1 0.14 +basic 1 0.14 +collected 1 0.14 +: 1 0.14 +Data 1 0.14 +ANDS 1 0.14 +more 1 0.14 +describe 1 0.14 +HCSNet 1 0.14 +denoting 1 0.14 +interviewed 1 0.14 +Trevor 1 0.14 +bitbucket 1 0.14 +testing 1 0.14 +Johnston 1 0.14 +effort 1 0.14 +pilot 1 0.14 +upgrades 1 0.14 +main 1 0.14 +look 1 0.14 +developing 1 0.14 +reliable 1 0.14 +pace 1 0.14 +while 1 0.14 +technoogy 1 0.14 +install 1 0.14 +Our 1 0.14 +transcripts 1 0.14 +country 1 0.14 +descriptions 1 0.14 +due 1 0.14 +documentation 1 0.14 +allowed 1 0.14 +sample 1 0.14 +enable 1 0.14 +create 1 0.14 +demonstration 1 0.14 +Map 1 0.14 +speakers 1 0.14 +inside 1 0.14 +end 1 0.14 +sessions 1 0.14 +things 1 0.14 +permission 1 0.14 +feature 1 0.14 +who 1 0.14 +started 1 0.14 +which 1 0.14 +digital 1 0.14 +many 1 0.14 +outside 1 0.14 +used 1 0.14 +'s 1 0.14 +separate 1 0.14 +collaboration 1 0.14 +after 1 0.14 +driver 1 0.14 +needs 1 0.14 +moment 1 0.14 +important 1 0.14 +designed 1 0.14 +tidying 1 0.14 +services 1 0.14 +elicit 1 0.14 +AusTalk 1 0.14 +expand 1 0.14 +stereo 1 0.14 +natural 1 0.14 +' 1 0.14 +third 1 0.14 +later 1 0.14 +game 1 0.14 +An 1 0.14 +As 1 0.14 +so 1 0.14 +Big 1 0.14 +allow 1 0.14 +sets 1 0.14