An implementation of the ID3 Decision Tree algorithm in Java from scratch.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

307 lines
10 KiB

4 years ago
  1. __kernel void splitDataSet(__global char *in, __global int *out, const int rowSpan,
  2. const int colSpan, __global char *pat, const int pat_length, __global char* out_char,
  3. __global char* nullChar)
  4. {
  5. const int idx = get_global_id(0);
  6. int start = (idx * rowSpan);
  7. int end = start + rowSpan;
  8. int N = rowSpan;
  9. int M = pat_length;
  10. bool found = false;
  11. char nullCharTemp = nullChar[0];
  12. if(in[start]!=nullCharTemp)
  13. {
  14. for(int k=start;k<end;k++)
  15. {
  16. int j;
  17. /* For current index i, check for pattern match */
  18. for (j = 0; j < M; j++)
  19. {
  20. if (in[k + j] != pat[j])
  21. break;
  22. }
  23. if (j == M)
  24. {
  25. out[idx] = 1;
  26. found = true;
  27. break;
  28. }
  29. }
  30. }
  31. if(found)
  32. {
  33. for(int k=start;k<end;k++)
  34. {
  35. out_char[k] = in[k];
  36. }
  37. }
  38. if(!found)
  39. {
  40. out[idx] = 0;
  41. for(int k=start;k<end;k++)
  42. {
  43. out_char[k] = nullCharTemp;
  44. }
  45. //out_char[start] = ' ';
  46. }
  47. }
  48. __kernel void sumReduction(__global int *A, __global int* C, int offset)
  49. {
  50. int global_id = get_global_id(0);
  51. //if(get_global_size(0) < 5)
  52. // printf("Thread id = %d", global_id);
  53. int start = global_id * offset;
  54. int end = start + offset;
  55. int i;
  56. for(i=start; i<end; i++)
  57. {
  58. C[global_id] += A[i];
  59. }
  60. }
  61. __kernel void foldKernel(__global int *arVal, int offset)
  62. {
  63. int gid = get_global_id(0);
  64. arVal[gid] = arVal[gid]+arVal[gid+offset];
  65. }
  66. __kernel void allOneValues(__global char* examples, __global int* yesArray, __global int* noArray, const int rowSpan,
  67. __global char* yes, __global char* no, __global char* nullChar)
  68. {
  69. const int idx = get_global_id(0);
  70. int start = (idx * rowSpan);
  71. int end = start + rowSpan;
  72. int N = rowSpan;
  73. int M = 16;
  74. bool found = false;
  75. char nullCharTemp = nullChar[0];
  76. if(examples[start]!=nullCharTemp)
  77. {
  78. for(int k=start;k<end;k++)
  79. {
  80. int j;
  81. /* For current index i, check for pattern match */
  82. for (j = 0; j < M; j++)
  83. {
  84. if (examples[k + j] != yes[j])
  85. break;
  86. }
  87. if (j == M)
  88. {
  89. //out[idx] = 1;
  90. found = true;
  91. break;
  92. }
  93. }
  94. }
  95. if(found)
  96. {
  97. for(int k=start;k<end;k++)
  98. {
  99. //out_char[k] = in[k];
  100. }
  101. //printf("Found at %d", idx);
  102. yesArray[idx] = 1;
  103. noArray[idx] = 0;
  104. }
  105. if(!found)
  106. {
  107. //out[idx] = 0;
  108. for(int k=start;k<end;k++)
  109. {
  110. //out_char[k] = nullCharTemp;
  111. }
  112. //out_char[start] = ' ';
  113. yesArray[idx] = 0;
  114. noArray[idx] = 1;
  115. }
  116. }
  117. __kernel void splitDataSetNew(__global char *examplesChar, __global int* examplesInt,
  118. const int rowSpan, __global char *pat,
  119. const int pat_length, __global int* examplesViInt)
  120. {
  121. const int idx = get_global_id(0);
  122. int start = (idx * rowSpan);
  123. int end = start + rowSpan;
  124. int N = rowSpan;
  125. int M = pat_length;
  126. bool found = false;
  127. if(examplesInt[idx] == 1)
  128. {
  129. for(int k=start;k<end;k++)
  130. {
  131. int j;
  132. /* For current index i, check for pattern match */
  133. for (j = 0; j < M; j++)
  134. {
  135. if (examplesChar[k + j] != pat[j])
  136. break;
  137. }
  138. if (j == M)
  139. {
  140. //out[idx] = 1;
  141. found = true;
  142. break;
  143. }
  144. }
  145. }
  146. if(found)
  147. {
  148. examplesViInt[idx] = 1;
  149. }
  150. else
  151. {
  152. examplesViInt[idx] = 0;
  153. }
  154. }
  155. __kernel void splitDataSet3(__global char *examplesChar, __global int* examplesInt,
  156. const int rowSpan, const int cellSpan, const int index, __global char *pat,
  157. const int pat_length, __global int* examplesViInt)
  158. {
  159. const int idx = get_global_id(0);
  160. int start = (idx * rowSpan + (2*index+1)*cellSpan);
  161. int end = start + cellSpan;
  162. //int N = rowSpan;
  163. int M = pat_length;
  164. bool found = false;
  165. if(examplesInt[idx] == 1)
  166. {
  167. for(int k=start;k<end;k++)
  168. {
  169. int j;
  170. /* For current index i, check for pattern match */
  171. for (j = 0; j < M; j++)
  172. {
  173. if (examplesChar[k + j] != pat[j])
  174. break;
  175. }
  176. if (j == M)
  177. {
  178. //out[idx] = 1;
  179. found = true;
  180. examplesViInt[idx] = 1;
  181. break;
  182. }
  183. }
  184. }
  185. if(!found)
  186. {
  187. examplesViInt[idx] = 0;
  188. }
  189. }
  190. __kernel void splitDataSet4(__global char *examplesChar, __global int* examplesInt,
  191. const int rowSpan, const int cellSpan, const int index, __global char *pat,
  192. const int pat_length,
  193. __global char* yes, __global int* yesArray,
  194. __global int* noArray, int targetIndex)
  195. {
  196. const int idx = get_global_id(0);
  197. int start = (idx * rowSpan + (2*index+1)*cellSpan);
  198. int end = start + cellSpan;
  199. //int N = rowSpan;
  200. int M = pat_length;
  201. bool found = false;
  202. if(examplesInt[idx] == 1)
  203. {
  204. for(int k=start;k<end;k++)
  205. {
  206. int j;
  207. /* For current index i, check for pattern match */
  208. for (j = 0; j < M; j++)
  209. {
  210. if (examplesChar[k + j] != pat[j])
  211. break;
  212. }
  213. if (j == M)
  214. {
  215. //out[idx] = 1;
  216. found = true;
  217. //examplesViInt[idx] = 1;
  218. }
  219. }
  220. }
  221. if(found)
  222. {
  223. int start = (idx * rowSpan + (2*targetIndex+1)*cellSpan);
  224. int end = start + cellSpan;
  225. int M = 6;
  226. bool yesFound = false;
  227. for(int k=start;k<end;k++)
  228. {
  229. int j;
  230. /* For current index i, check for pattern match */
  231. for (j = 0; j < M; j++)
  232. {
  233. if (examplesChar[k + j] != yes[j])
  234. break;
  235. }
  236. if (j == M)
  237. {
  238. //out[idx] = 1;
  239. //found = true;
  240. //examplesViInt[idx] = 1;
  241. yesArray[idx] = 1;
  242. noArray[idx] = 0;
  243. yesFound = true;
  244. break;
  245. }
  246. }
  247. if(!yesFound){
  248. noArray[idx] = 1;
  249. yesArray[idx] = 0;
  250. }
  251. }
  252. else{
  253. yesArray[idx] = 0;
  254. noArray[idx] = 0;
  255. }
  256. }
  257. __kernel void getCount(__global char *examplesChar, __global int* examplesInt,
  258. const int rowSpan, const int cellSpan, const int index, __global char *pat,
  259. const int pat_length, __global int* countArray)
  260. {
  261. const int idx = get_global_id(0);
  262. int start = (idx * rowSpan + (2*index+1)*cellSpan);
  263. int end = start + cellSpan;
  264. //int N = rowSpan;
  265. int M = pat_length;
  266. bool found = false;
  267. if(examplesInt[idx] == 1)
  268. {
  269. for(int k=start;k<end;k++)
  270. {
  271. int j;
  272. /* For current index i, check for pattern match */
  273. for (j = 0; j < M; j++)
  274. {
  275. if (examplesChar[k + j] != pat[j])
  276. break;
  277. }
  278. if (j == M)
  279. {
  280. //out[idx] = 1;
  281. found = true;
  282. countArray[idx] = 1;
  283. break;
  284. }
  285. }
  286. }
  287. if(!found)
  288. {
  289. countArray[idx] = 0;
  290. }
  291. }