Lan Zagar avatar Lan Zagar committed a952f31

Added scripts for experiments.

Comments (0)

Files changed (4)

_multitask/__init__.py

 from __future__ import absolute_import
-
-import pkg_resources
-
+from pkg_resources import resource_filename
 import Orange
-
 from . import mtfeat
 
 
 def datasets():
-    yield ('multitask', pkg_resources.resource_filename(__name__, 'datasets'))
+    yield ('multitask', resource_filename(__name__, 'datasets'))
 
 def split_by_task(data, task_id='task'):
     """
     name = "multitask",
     packages = ["_multitask"],
     package_data = {},
-    version = "0.3",
+    version = "0.4",
     description = "Orange add-on for multi-task learning",
     author = "Lan Zagar",
     author_email = "lan.zagar@fri.uni-lj.si",

tests/generate_data.py

+from numpy import dot, zeros, column_stack, vstack, concatenate, random as rnd
+
+import Orange
+
+
+def to_table(X, y=None):
+    n, m = X.shape
+    features = [Orange.feature.Continuous('f%i' % i) for i in range(m)]
+    if y is None:
+        return Orange.data.Table(Orange.data.Domain(features, False), X)
+    else:
+        return Orange.data.Table(
+            Orange.data.Domain(features, Orange.feature.Continuous('class')),
+            column_stack((X, y)))
+
+class Generator(object):
+    def generate_matrix(self, **kwargs):
+        data = self.generate_table(**kwargs)
+        return data.to_numpy()[:2]
+    
+    def generate_table(self, **kwargs):
+        data = self.generate_matrix(**kwargs)
+        return to_table(*data)
+
+    def __call__(self, orange=True, **kwargs):
+        if orange:
+            return self.generate_table(**kwargs)
+        else:
+            return self.generate_matrix(**kwargs)
+
+class Group(Generator):
+    def __init__(self, groups=10, pergroup=20, factors=[5, 4, 3, 2, 1],
+                 tasks=20, overlap=2, seed=42):
+        self.groups = groups
+        self.pergroup = pergroup
+        self.factors = factors
+        self.m = groups * (pergroup - overlap) + overlap
+        self.tasks = tasks
+        self.seed = seed
+
+        rnd.seed(seed)
+        self.transform = zeros((self.m, sum(factors)))
+        f = 0
+        for i, k in enumerate(factors):
+            start = i * (pergroup - overlap)
+            self.transform[start:start + pergroup, f:f + k] = \
+                rnd.normal(0, 1, (pergroup, k))
+            f += k
+        mus = rnd.normal(0, 1, sum(factors))
+        self.weights = rnd.normal(mus, 1, (tasks, sum(factors)))
+        self.intercepts = rnd.normal(100, 20, tasks)
+
+    def get_model(self):
+        return self.transform, self.weights, self.intercepts
+
+    def generate_matrix(self, pertask=50, seed=None):
+        if seed is None:
+            seed = self.seed
+        rnd.seed(seed)
+        Xs = [rnd.normal(0, 1, (pertask, self.m))
+              for _ in range(self.tasks)]
+        ys = [dot(dot(X, self.transform), w) + i
+              for X, w, i in zip(Xs, self.weights, self.intercepts)]
+        X = vstack(Xs)
+        y = concatenate(ys)
+        y += rnd.normal(0, 1, len(y))
+        return (X, y)
+
+    def __call__(self, **kwargs):
+        data = self.generate_table(**kwargs)
+        values=[str(i) for i in range(self.tasks)]
+        task = Orange.feature.Discrete('task', values=values)
+        id = Orange.feature.Descriptor.new_meta_id()
+        data.domain.add_meta(id, task)
+        for ins, t in zip(data, (str(i) for i in range(self.tasks)
+                                 for _ in range(len(data) // self.tasks))):
+            ins[task] = t
+        return data
+

tests/test_groups.py

+import Orange
+from Orange.multitask import MultiTaskLearner, mtfeat
+
+import generate_data
+
+
+groups, pergroup, tasks, overlap = 10, 10, 10, 2
+factors = [5,4,3,2,1]
+data = generate_data.Group(groups, pergroup, factors, tasks, overlap=overlap)(pertask=20)
+
+mean = Orange.regression.mean.MeanLearner()
+mtmean = MultiTaskLearner(learner=mean)
+ridge = Orange.regression.linear.LinearRegressionLearner(ridge_lambda=1)
+mtridge = MultiTaskLearner(learner=ridge)
+mtf = mtfeat.MTFeatLearner(max_iter=50, gamma=1, intercept=True)
+mtfs = mtfeat.MTFeatLearner(max_iter=50, gamma=1, selection=True, intercept=True)
+mtfg = mtfeat.MTFeatLearner(max_iter=50, gamma=1, intercept=True, groups=[
+    range(i * (pergroup - overlap), (i + 1) * pergroup - i * overlap)
+    for i in range(groups)])
+
+learners = [mean, mtmean, ridge, mtridge, mtfs, mtf, mtfg]
+res = Orange.evaluation.testing.cross_validation(learners, data, folds=4, store_classifiers=1)
+print Orange.evaluation.scoring.RMSE(res)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.