0
我正在做我的项目使用聚类和KNN算法的文本分类。但是我的分类工作不正常。我只是使用每个字母的ASCII值来进行分类。在我的分类中,我只是根据数据的大小进行分类。我不知道这个项目更多。我想获得关于KNN分类的一些信息。KNN在Matlab中的文本分类
我已经获得路透社21578,路透社转录子集的子集。我不知道该怎么做。
这是我的示例代码
function [out2, out3, tme] = knnk_latests(data, foll)
data = data;
s = cd;
tic;
if isempty(foll)
foll = 'tempdat';
end
ss = [s '\' foll];
dir1 = dir(ss);
c = [];
hlp_count = zeros(length(dir1) - 2, 1);
for j = 3:length(dir1)
sss = [ss '\' dir1(j).name];
cd(sss);
dir2 = dir;
for i = 3:length(dir2)
fid = fopen([sss '\' dir2(i).name]);
ct = fread(fid, 10000000, 'uint8=>char'); %% an erroneous "'" after the closing parenthesis was removed
fclose(fid);
if i == 3 && j == 3
c=[ct '@@@@'];
else
if i == length(dir2) && j == length(dir1)
c = [c ct];
else
c = [c ct '@@@@'];
end
end
hlp_count(j - 2) = hlp_count(j - 2) + 1;
end
end
cd(s);
trainset = strsplit(c, '@@@@');
trainset = trainset';
trainset = char(trainset);
lng_hlp = size(trainset);
lngt = lng_hlp(1);
for i = 1:lngt
if strcmp(trainset(i, 1), ' ')
trainset(i, 1:end - 1) = trainset(i, 2:end);
trainset(i, end) = ' ';
else
trainset(i,:)=trainset(i,:);
end
end
trainset1 = double(trainset);
sample11 = data;
samples = char(sample11);
ln_hlp = size(samples);
lnt = ln_hlp(1);
for i = 1:lnt
if strcmp(samples(i, 1),' ')
samples(i, 1:end - 1) = samples(i, 2:end);
samples(i, end) = ' ';
else
samples(i,:)=samples(i,:);
end
end
samples1 = double(samples);
a = size(samples1);
b = size(trainset1);
if a(2) > b(2) == 1
trainset1 = [ trainset1 32 * ones(b(1), a(2) - b(2)) ];
else
samples1 = [ samples1 32 * ones(a(1), b(2) - a(2)) ];
end
grp = [1:lngt]';
class = knnclassify(samples1, trainset1, grp);
fle = class;
tme = num2str(toc/fle * 1000);
i = 0;
while class > 0
i = i + 1;
hllp = class;
class = class - hlp_count(i);
end
fprintf('\n');
fprintf('\n');
out2 = dir1(i + 2).name;
hlp_nmedr = dir([ss '\' out2]);
hlp_nme = hlp_nmedr(hllp + 2).name(1:end - 4);
fprintf('Output:- ');
fprintf('\n');
fprintf(2, 'The input is matched to the class : ')
disp(out2);
fprintf(2, 'Sub class : ')
out3 = [out2 hlp_nme];
outt3 = [out2 ',' num2str(hllp)];
disp(outt3);
fprintf('\n');
cd(s);