Commits

Anonymous committed 352d23c

First draft of full outline

Comments (0)

Files changed (5)

 \relax 
-\@writefile{toc}{\contentsline {chapter}{\numberline {1}Atomic findings}{2}}
+\@writefile{toc}{\contentsline {chapter}{\numberline {1}Atomic findings}{3}}
 \@writefile{lof}{\addvspace {10\p@ }}
 \@writefile{lot}{\addvspace {10\p@ }}
-\@writefile{toc}{\contentsline {section}{\numberline {1.1}Our aggregated address book is a Semantic Web\xspace  application}{2}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.2}Availability of RDF data sources is a problem}{2}}
-\newlabel{availability_of_rdf}{{1.2}{2}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.1}Our aggregated address book is a Semantic Web\xspace  application}{3}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.2}Availability of RDF data sources is a problem}{3}}
+\newlabel{availability_of_rdf}{{1.2}{3}}
 \citation{Kinsella08}
 \citation{Hogan07}
-\@writefile{toc}{\contentsline {section}{\numberline {1.3}Modeling in RDF is an expert-task}{3}}
-\newlabel{modeling_expert_task}{{1.3}{3}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.4}Data integration is a problem at three levels}{3}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.5}Structural integration}{4}}
-\newlabel{structural_integration}{{1.5}{4}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.6}Syntactical integration}{4}}
-\newlabel{syntactical_integration}{{1.6}{4}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.7}Semantical integration}{5}}
-\newlabel{semantical_integration}{{1.7}{5}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.8}Automated reasoning depends on expressivity of underlying standards}{7}}
-\newlabel{reasoning_expressivity}{{1.8}{7}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.9}Automated reasoning performance is a tricky topic}{7}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.10}SPARQL (potentially) requires a lot of queries to fetch a single data set}{7}}
-\newlabel{many_queries_required}{{1.10}{7}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.3}Modeling in RDF is an expert-task}{4}}
+\newlabel{modeling_expert_task}{{1.3}{4}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.4}Data integration is a problem at three levels}{4}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.5}Structural integration}{5}}
+\newlabel{structural_integration}{{1.5}{5}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.6}Syntactical integration}{5}}
+\newlabel{syntactical_integration}{{1.6}{5}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.7}Semantical integration}{6}}
+\newlabel{semantical_integration}{{1.7}{6}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.8}Automated reasoning depends on expressivity of underlying standards}{8}}
+\newlabel{reasoning_expressivity}{{1.8}{8}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.9}Automated reasoning performance is a tricky topic}{8}}
+\newlabel{reasoning_performance}{{1.9}{8}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.10}SPARQL (potentially) requires a lot of queries to fetch a single data set}{8}}
+\newlabel{many_queries_required}{{1.10}{8}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.11}User interfaces can fuse interface elements and data on server or on client}{9}}
+\newlabel{ui_fusion}{{1.11}{9}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.12}User interfaces can be domain-independent or domain-dependent}{9}}
+\newlabel{ui_domain_dependence}{{1.12}{9}}
 \citation{activerdf}
 \citation{Oren07}
 \citation{activerdf}
-\@writefile{toc}{\contentsline {section}{\numberline {1.11}User interfaces can fuse interface elements and data on server or on client}{8}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.12}Object-triple mapping to make RDF data compatible with existing web application frameworks}{8}}
-\newlabel{otm}{{1.12}{8}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.13}Requirements for publishing optimally reusable data}{9}}
-\newlabel{publishing_requirements}{{1.13}{9}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.14}Publishing data via SPARQL endpoint}{10}}
-\newlabel{publishing_sparql}{{1.14}{10}}
-\@writefile{toc}{\contentsline {subsubsection}{The good}{10}}
-\@writefile{toc}{\contentsline {subsubsection}{The bad}{10}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.15}Publishing data via Linked Data}{11}}
-\@writefile{toc}{\contentsline {subsubsection}{The good}{11}}
-\@writefile{toc}{\contentsline {subsubsection}{The bad}{12}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.16}Publishing data via custom API}{12}}
-\@writefile{toc}{\contentsline {subsubsection}{The good}{12}}
-\@writefile{toc}{\contentsline {subsubsection}{The bad}{13}}
-\@writefile{toc}{\contentsline {section}{\numberline {1.17}There is no all-round publishing solution}{13}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.13}Object-triple mapping to make RDF data compatible with existing web application frameworks}{10}}
+\newlabel{otm}{{1.13}{10}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.14}Data source properties}{10}}
+\newlabel{source_properties}{{1.14}{10}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.15}Publishing types and their properties}{11}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.16}Linked Data and custom API make assumptions about data model}{11}}
+\newlabel{publisher_assumptions}{{1.16}{11}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.17}Consumer types and their properties}{12}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.18}Publishing type converters}{12}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.19}Publishing for optimal reusability}{12}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.20}Publishing data via SPARQL endpoint}{13}}
+\newlabel{publishing_sparql}{{1.20}{13}}
+\@writefile{toc}{\contentsline {subsubsection}{The good}{13}}
+\@writefile{toc}{\contentsline {subsubsection}{The bad}{14}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.21}Publishing data via Linked Data}{15}}
+\@writefile{toc}{\contentsline {subsubsection}{The good}{15}}
+\@writefile{toc}{\contentsline {subsubsection}{The bad}{15}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.22}Publishing data via custom API}{16}}
+\@writefile{toc}{\contentsline {subsubsection}{The good}{16}}
+\@writefile{toc}{\contentsline {subsubsection}{The bad}{16}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.23}Consuming distributed data}{17}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.24}Consuming distributed data via query federation}{17}}
+\newlabel{query_federation}{{1.24}{17}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.25}Consuming distributed data via caching locally}{17}}
 \citation{Heitmann09}
-\@writefile{toc}{\contentsline {section}{\numberline {1.18}Integration providers can reduce complexity}{14}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.26}Integration providers can reduce complexity}{18}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.27}Data integration can be achieved in an elegant manner}{19}}
+\@writefile{toc}{\contentsline {paragraph}{Follows from}{19}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.28}Separation between application logic and data model}{19}}
+\newlabel{separation_of_concerns}{{1.28}{19}}
+\@writefile{toc}{\contentsline {section}{\numberline {1.29}Internal models are (relatively) flexible}{20}}
+\newlabel{flexible_model}{{1.29}{20}}
 \bibcite{Kinsella08}{1}
 \bibcite{Hogan07}{2}
 \bibcite{activerdf}{3}

thesis-v3.dvi

Binary file modified.
-This is pdfTeXk, Version 3.141592-1.40.3 (Web2C 7.5.6) (format=latex 2010.5.7)  2 JUL 2010 02:11
+This is pdfTeXk, Version 3.141592-1.40.3 (Web2C 7.5.6) (format=latex 2010.5.7)  8 JUL 2010 01:11
 entering extended mode
  %&-line parsing enabled.
 **thesis-v3.tex
 ] (./thesis-v3.toc
 LaTeX Font Info:    External font `cmex10' loaded for size
 (Font)              <10.95> on input line 2.
-)
+
+Overfull \hbox (0.82945pt too wide) in paragraph at lines 13--13
+ [][] []\T1/cmr/m/n/10.95 User in-ter-faces can be domain-independent or domain
+-dependent [][]  
+ []
+
+[1
+
+])
 \tf@toc=\write3
 \openout3 = `thesis-v3.toc'.
 
-
-[1
-
-]
+ [2]
 Chapter 1.
 
 Overfull \hbox (1.36787pt too wide) in paragraph at lines 42--42
 er-nal sources,
  []
 
-[2
+[3
 
 ]
-
-LaTeX Warning: Reference `separation_of_concerns' on page 3 undefined on input 
-line 82.
-
 LaTeX Font Info:    External font `cmex10' loaded for size
 (Font)              <9> on input line 99.
 LaTeX Font Info:    External font `cmex10' loaded for size
 (Font)              <5> on input line 99.
-[3]
-
-LaTeX Warning: Reference `separation_of_concerns' on page 4 undefined on input 
-line 127.
-
-
-LaTeX Warning: Reference `flexible_model' on page 4 undefined on input line 131
-.
-
-
-Overfull \hbox (0.29942pt too wide) in paragraph at lines 148--150
+ [4]
+Overfull \hbox (0.29942pt too wide) in paragraph at lines 149--151
 []\T1/cmr/m/n/10.95 F.e. if the do-main of ev-ery foaf:mbox prop-
  []
 
-[4]
-
-LaTeX Warning: Reference `separation_of_concerns' on page 5 undefined on input 
-line 175.
-
-
-LaTeX Warning: Reference `flexible_model' on page 5 undefined on input line 179
-.
-
-[5]
-
-LaTeX Warning: Reference `separation_of_concerns' on page 6 undefined on input 
-line 205.
-
-
-LaTeX Warning: Reference `flexible_model' on page 6 undefined on input line 209
-.
-
-[6] [7]
-
-LaTeX Warning: Reference `flexible_model' on page 8 undefined on input line 295
-.
-
-
-Overfull \hbox (2.78566pt too wide) in paragraph at lines 302--302
+[5] [6] [7] [8] [9]
+Overfull \hbox (2.78566pt too wide) in paragraph at lines 327--327
 []\T1/cmr/bx/n/14.4 Object-triple map-ping to make RDF data com-
  []
 
-[8]
 
-LaTeX Warning: Reference `ecosystem' on page 9 undefined on input line 321.
+LaTeX Warning: Reference `ecosystem' on page 10 undefined on input line 346.
 
+[10] [11]
 
-Overfull \hbox (6.44614pt too wide) in paragraph at lines 331--331
-[]\T1/cmr/bx/n/14.4 Requirements for pub-lish-ing op-ti-mally reusable
- []
+LaTeX Warning: Reference `publishing_linkeddata' on page 12 undefined on input 
+line 450.
 
 
-LaTeX Warning: Reference `publishing_linkeddata' on page 9 undefined on input l
-ine 363.
+LaTeX Warning: Reference `publishing_customapi' on page 12 undefined on input l
+ine 450.
 
 
-LaTeX Warning: Reference `publishing_customapi' on page 9 undefined on input li
-ne 363.
+LaTeX Warning: Reference `publishing_linkeddata' on page 12 undefined on input 
+line 454.
 
-[9] [10] [11] [12]
 
-LaTeX Warning: Reference `publishing_linkeddata' on page 13 undefined on input 
-line 498.
+LaTeX Warning: Reference `publishing_customapi' on page 12 undefined on input l
+ine 456.
 
+[12]
 
-LaTeX Warning: Reference `publishing_customapi' on page 13 undefined on input l
-ine 500.
+LaTeX Warning: Reference `publishing_requirements' on page 13 undefined on inpu
+t line 480.
 
-[13] [14] [15
+[13] [14] [15] [16] [17] [18] [19] [20] [21
 
 ] (./thesis-v3.aux)
 
 
  ) 
 Here is how much of TeX's memory you used:
- 3076 strings out of 95087
- 43586 string characters out of 1183279
- 92783 words of memory out of 1500000
- 6222 multiletter control sequences out of 10000+50000
+ 3083 strings out of 95087
+ 43717 string characters out of 1183279
+ 92865 words of memory out of 1500000
+ 6229 multiletter control sequences out of 10000+50000
  20194 words of font info for 47 fonts, out of 1200000 for 2000
  28 hyphenation exceptions out of 8191
  27i,8n,32p,337b,383s stack positions out of 5000i,500n,6000p,200000b,5000s
 
-Output written on thesis-v3.dvi (16 pages, 40596 bytes).
+Output written on thesis-v3.dvi (22 pages, 55232 bytes).
 
 \begin{itemize}
   \item Responsible for structural integration: rewriting data as RDF.
+  \item Give example from case study: e-mail addresses.
   \item No usable generic components available. \todo{Illustrate landscape and
     explain why it is not usable.}
   \item Therefore they have to be developed, which means they are on the
 component because no generic components are available.
 
 \begin{itemize}
+  \item Give example from case study: e-mail addresses.
   \item This is a development task, which means it is on the application
     developer's plate.
     \begin{itemize}
 Both can be achieved using automated reasoning:
 
 \begin{itemize}
+  \item Give example from case study: multiple e-mail addresses
   \item Integration logic can be defined declaratively, in the language of the
     data, while the actual processing can be delegated to a generic software
     component (a reasoning engine). \todo{Give examples of both alignment and
 \todo{Check chapter 5 of the Working Ontologist book.}
 
 \section{Automated reasoning performance is a tricky topic}
+\label{reasoning_performance}
 
 \begin{itemize}
   \item In case of forward reasoning: after every data update (create, update,
 
 \section{User interfaces can fuse interface elements and data on server or on
 client}
+\label{ui_fusion}
 
 \begin{description}
   \item[Server] Data does not need to be serialized in order to be transferred
     morph\footnote{\url{http://convert.test.talis.com/}}.
 \end{description}
 
+\section{User interfaces can be domain-independent or domain-dependent}
+\label{ui_domain_dependence}
+
+Domain-independent:
+
+\begin{itemize}
+  \item Search
+  \item Visualization
+  \item Faceted browsing
+\end{itemize}
+
+Anything with requirements beyond these three options needs a domain-dependent
+user interface:
+
+\begin{itemize}
+  \item makes assumptions about the data model
+  \item these assumptions are hard-coded on the place where interface elements
+    and data are fused (see section \ref{ui_fusion})
+  \item illustrate using example from case study: e-mail addresses
+\end{itemize}
 
 \section{Object-triple mapping to make RDF data compatible with existing web
 application frameworks}
 \end{itemize}
 
 
-\section{Requirements for publishing optimally reusable data}
-\label{publishing_requirements}
 
-Published data is optimally reusable if the same publication can be consumed by
-all of the following in a workable fashion:
+\section{Data source properties}
+\label{source_properties}
+
+Data sources can have any number of the following features/characteristics:
+
+\begin{enumerate}
+  \item[RDF data] standard data structure
+  \item[Automatically discoverable] standard interface
+  \item[Control over data structure]: custom data structure: consumers may
+    prefer certain structures like not too nested or all records under key
+    'items' (like Exhibit does)
+  \item[Retrievable] custom interface: data must be retrievable in a form that
+    matches the needs of the consumer
+  \item[Authentication] \ldots
+  \item[Authoring] create, update, delete
+  \item[Data as JSON over HTTP] \ldots
+  \item[Cross-domain interaction] JSONP or CORS
+\end{enumerate}
+
+\section{Publishing types and their properties}
+
+Data sources come in various types:
+
+\begin{enumerate}
+  \item SPARQL endpoint;
+  \item Linked Data;
+  \item Custom HTTP API;
+  \item Relational database.
+\end{enumerate}
+
+We can evaluate each of them based on the properties from section
+\ref{source_properties}.
+
+\ldots
+
+\section{Linked Data and custom API make assumptions about data model}
+\label{publisher_assumptions}
+
+The components that implement publishing as Linked Data and custom API will need
+hardcoded assumptions about the data model.
+
+\todo{We have to take the effort that it takes for the application developer to
+publish data in a certain way into account\ldots hmm where to put that
+discussion?}
+
+\section{Consumer types and their properties}
+
+We can distinguish the following types of consumers:
 
 \begin{enumerate}
   \item Crawler (integration provider).
     \end{enumerate}
 \end{enumerate}
 
-Each of these has different requirements and demands:
+We can evaluate each of them by which of the properties from section
+\ref{source_properties} they require.
 
-\begin{enumerate}
-  \item[RDF data] 1;
-  \item[Automatically discoverable] 1;
-  \item[Retrievable] Data must be retrievable in a form that matches the needs
-    of the consumer: 2;
-  \item[Control over data structure]: Consumers may prefer certain structures,
-    like not too nested or all records under key 'items' (like Exhibit does): 2;
-  \item[Authentication] 2;
-  \item[Authoring] 2;
-  \item[Data as JSON over HTTP] 2a;
-  \item[Cross-domain interaction] JSONP or CORS: 2a.
-\end{enumerate}
+\ldots
+
+\todo{Illustrate using examples such as geocoding scenario\ldots express
+uselessness of standard interfaces and data structures in case of application/ui
+consumer: we need to know the exact models that are in the data in order to be
+able to query meaningful chunks.}
+
+\section{Publishing type converters}
+
+\begin{itemize}
+  \item SPARQL wrappers (D2R, Semantic Bridge)
+  \item Linked Data from SPARQL endpoint (Pubby)
+\end{itemize}
+
+Wrappers have the same architectural position as adapters (see
+section \ref{structural_integration}), but they provide a full SPARQL interface
+and need to be capable of on-the-fly translation of queries to corresponding
+requests to the wrapped data source and map the data to RDF model at the same
+time. Complexity and limitations depend on nature of wrapped source:
+
+\begin{itemize}
+  \item Relational database: feasible
+  \item Web service: not feasible
+\end{itemize}
+
+\section{Publishing for optimal reusability}
+
+Published data is optimally reusable if the same publication can be consumed by
+all of the following in a workable fashion:
 
 \todo{Insert comparison table with results from sections
 \ref{publishing_sparql}, \ref{publishing_linkeddata} and
 \ref{publishing_customapi}.}
 
+\begin{itemize}
+  \item We have seen that neither SPARQL (see section \ref{publishing_sparql})
+    or Linked Data (see section \ref{publishing_linkeddata}) is a feasible
+    approach for publishing data for the purpose of reuse by applications. We
+    need a dedicated API (see section \ref{publishing_customapi}) for that, like
+    the APIs that are commonly implemented in traditional web applications.
+  \item No authentication and authoring in case of SPARQL and Linked Data; makes
+    it only suitable for public (shared) data.
+  \item We have also seen that custom APIs have the disadvantage that their data
+    will not be discovered by automated integration providers, as they will not
+    know how to talk to the API.
+  \item We conclude that in order to facilitate for maximum reusability of our
+    data, we will have to supply both a custom API and a SPARQL endpoint or
+    Linked Data.
+  \item This sucks, because:
+    \begin{itemize}
+      \item it means the developer needs to do all the work of a traditional web
+        application, and then some more. No benefit on the side of the developer
+        here!
+      \item providing a data interface for a (local) user interface can be used
+        as an incentive to make developers contribute to the web of data; this
+        incentive is now lost.
+    \end{itemize}
+\end{itemize}
+
 \section{Publishing data via SPARQL endpoint}
 \label{publishing_sparql}
 
 In section \ref{publishing_requirements} we have seen which factors play a role
-in publishing data. Let us discuss an approach based on a SP ARQL endpoint and
+in publishing data. Let us discuss an approach based on a SPARQL endpoint and
 its impact on these factors.
 
 \subsubsection{The good}
   \item Our data is not (automatically) discoverable.
 \end{itemize}
 
-\section{There is no all-round publishing solution}
+\section{Consuming distributed data}
 
 \begin{itemize}
-  \item We have seen that neither SPARQL (see section \ref{publishing_sparql})
-    or Linked Data (see section \ref{publishing_linkeddata}) is a feasible
-    approach for publishing data for the purpose of reuse by applications. We
-    need a dedicated API (see section \ref{publishing_customapi}) for that, like
-    the APIs that are commonly implemented in traditional web applications.
-  \item No authentication and authoring in case of SPARQL and Linked Data; makes
-    it only suitable for public (shared) data.
-  \item We have also seen that custom APIs have the disadvantage that their data
-    will not be discovered by automated integration providers, as they will not
-    know how to talk to the API.
-  \item We conclude that in order to facilitate for maximum reusability of our
-    data, we will have to supply both a custom API and a SPARQL endpoint or
-    Linked Data.
-  \item This sucks, because:
+  \item query federation
+  \item local cache
+\end{itemize}
+
+\section{Consuming distributed data via query federation}
+\label{query_federation}
+
+Mediator component that federates a query to an arbitrary number of various
+external data sources (SPARQL endpoints).
+
+\begin{itemize}
+  \item Dependency on SPARQL endpoints, which is a problem -- see section
+    \ref{availability_of_rdf}.
+  \item Complex and no mature implementations
+  \item Limited semantical integration possibilities:
     \begin{itemize}
-      \item it means the developer needs to do all the work of a traditional web
-        application, and then some more. No benefit on the side of the developer
-        here!
-      \item providing a data interface for a (local) user interface can be used
-        as an incentive to make developers contribute to the web of data; this
-        incentive is now lost.
+      \item the full data set is not available so we cannot calculate a full
+        closure. E.g. unification of identical resources is not possible
+        optimally because there is no guarantee that two sources that contain
+        entities with the same identity always deliver them both (or both not).
+      \item mediator does not do semantic integration, so the query that is sent
+        to it must deal with the various data models. E.g. explicitly call for
+        resources of type foaf:Agent as well as type vcard:VCard.
     \end{itemize}
 \end{itemize}
 
+\section{Consuming distributed data via caching locally}
+
+Copy all relevant data from the various sources of interest, then execute
+queries over them locally.
+
+\begin{itemize}
+  \item SPARQL is a MDBQL, but not really useful because:
+    \begin{itemize}
+      \item dependency on SPARQL endpoints;
+      \item potentially huge amounts of network transfers at the time of the
+        query (online caching): inefficient and unacceptable response times.
+    \end{itemize}
+  \item By adopting a custom cache population approach, we can solve both issues
+    from previous point:
+    \begin{itemize}
+      \item no dependency on interfaces via adapters (see section
+        \ref{structural_integration});
+      \item offline caching by calculating the closure in a separated process,
+        only when it is needed (see secion \ref{reasoning_performance}).
+    \end{itemize}
+  \item But hard limit: when data sets get too large it gets more and more
+    problematic to copy all data and do reasoning over it. Potential way around
+    this problem: leverage existing third party cache, but feasibility depends
+    on:
+    \begin{itemize}
+      \item availability of caching service
+      \item trust in quality of data.
+    \end{itemize}
+  \item Data invalidation is/can be a hard problem.
+  \item Full semantical integration possibilities, as we can apply reasoning on
+    the full data set (that is; after the copy, before the query).
+\end{itemize}
+
+
 \section{Integration providers can reduce complexity}
 
 \begin{itemize}
     independently (and by other people) and serve multiple applications.
 \end{itemize}
 
+\section{Data integration can be achieved in an elegant manner}
 
-\section{Consuming distributed data via query federation}
+\paragraph{Follows from}
 
-\ldots
+Sections \ref{structural_integration}, \ref{syntactical_integration} and
+\ref{semantical_integration}.
 
-\section{Consuming distributed data via caching locally}
+Data integration is not a responsibility of the code in the application logic,
+but can be defined in separated declarative statements whose implications can be
+derived by a generic component. Semantic integration is no longer a task of the
+application developer but of the data modeler.
 
-\ldots
-
-\section{Data integration is easy and elegant}
-
-Under the assumption that:
+But this is only true when the following hold true:
 
 \begin{itemize}
   \item sources provide RDF data;
-  \item all data is cached in a single store locally.
+  \item all data that is involved in the integration is available in a single
+    store locally;
+  \item syntactical validation at literal level is not required;
+  \item semantic integration is within capabilities of reasoning engine;
+  \item compliance to specifications validation can be delegated to generic
+    component.
 \end{itemize}
 
+
+\section{Separation between application logic and data model}
+\label{separation_of_concerns}
+
+\begin{itemize}
+  \item integration service (see sections on integration) and user interface
+    (see sections \ref{ui_domain_dependence} and \ref{publisher_assumptions})
+    require descriptive specifications of (part of) the data model, which makes
+    for an asymetric separation between application logic and data model:
+    application depends on data, data does not depend on application.
+  \item semantic integration is possible without the help of any dedicated
+    integration procedures in the application logic. Instead general-purpose
+    components can be leveraged (reasoning engine), and integration logic can be
+    defined declaratively and in the same language as the data (RDF). As such,
+    integration semantics are part of the data model and therefore the task of
+    the data modeler instead of the application developer. I.e. separation of
+    concerns between application logic (developer) and data model (modeler).
+\end{itemize}
+
+\section{Internal models are (relatively) flexible}
+\label{flexible_model}
+
+\todo{Entailed by and defined in terms of section \ref{separation_of_concerns}.}
+
 \begin{thebibliography}{9}
 
 \bibitem{Kinsella08}
-\contentsline {chapter}{\numberline {1}Atomic findings}{2}
-\contentsline {section}{\numberline {1.1}Our aggregated address book is a Semantic Web\xspace application}{2}
-\contentsline {section}{\numberline {1.2}Availability of RDF data sources is a problem}{2}
-\contentsline {section}{\numberline {1.3}Modeling in RDF is an expert-task}{3}
-\contentsline {section}{\numberline {1.4}Data integration is a problem at three levels}{3}
-\contentsline {section}{\numberline {1.5}Structural integration}{4}
-\contentsline {section}{\numberline {1.6}Syntactical integration}{4}
-\contentsline {section}{\numberline {1.7}Semantical integration}{5}
-\contentsline {section}{\numberline {1.8}Automated reasoning depends on expressivity of underlying standards}{7}
-\contentsline {section}{\numberline {1.9}Automated reasoning performance is a tricky topic}{7}
-\contentsline {section}{\numberline {1.10}SPARQL (potentially) requires a lot of queries to fetch a single data set}{7}
-\contentsline {section}{\numberline {1.11}User interfaces can fuse interface elements and data on server or on client}{8}
-\contentsline {section}{\numberline {1.12}Object-triple mapping to make RDF data compatible with existing web application frameworks}{8}
-\contentsline {section}{\numberline {1.13}Requirements for publishing optimally reusable data}{9}
-\contentsline {section}{\numberline {1.14}Publishing data via SPARQL endpoint}{10}
-\contentsline {subsubsection}{The good}{10}
-\contentsline {subsubsection}{The bad}{10}
-\contentsline {section}{\numberline {1.15}Publishing data via Linked Data}{11}
-\contentsline {subsubsection}{The good}{11}
-\contentsline {subsubsection}{The bad}{12}
-\contentsline {section}{\numberline {1.16}Publishing data via custom API}{12}
-\contentsline {subsubsection}{The good}{12}
-\contentsline {subsubsection}{The bad}{13}
-\contentsline {section}{\numberline {1.17}There is no all-round publishing solution}{13}
-\contentsline {section}{\numberline {1.18}Integration providers can reduce complexity}{14}
+\contentsline {chapter}{\numberline {1}Atomic findings}{3}
+\contentsline {section}{\numberline {1.1}Our aggregated address book is a Semantic Web\xspace application}{3}
+\contentsline {section}{\numberline {1.2}Availability of RDF data sources is a problem}{3}
+\contentsline {section}{\numberline {1.3}Modeling in RDF is an expert-task}{4}
+\contentsline {section}{\numberline {1.4}Data integration is a problem at three levels}{4}
+\contentsline {section}{\numberline {1.5}Structural integration}{5}
+\contentsline {section}{\numberline {1.6}Syntactical integration}{5}
+\contentsline {section}{\numberline {1.7}Semantical integration}{6}
+\contentsline {section}{\numberline {1.8}Automated reasoning depends on expressivity of underlying standards}{8}
+\contentsline {section}{\numberline {1.9}Automated reasoning performance is a tricky topic}{8}
+\contentsline {section}{\numberline {1.10}SPARQL (potentially) requires a lot of queries to fetch a single data set}{8}
+\contentsline {section}{\numberline {1.11}User interfaces can fuse interface elements and data on server or on client}{9}
+\contentsline {section}{\numberline {1.12}User interfaces can be domain-independent or domain-dependent}{9}
+\contentsline {section}{\numberline {1.13}Object-triple mapping to make RDF data compatible with existing web application frameworks}{10}
+\contentsline {section}{\numberline {1.14}Data source properties}{10}
+\contentsline {section}{\numberline {1.15}Publishing types and their properties}{11}
+\contentsline {section}{\numberline {1.16}Linked Data and custom API make assumptions about data model}{11}
+\contentsline {section}{\numberline {1.17}Consumer types and their properties}{12}
+\contentsline {section}{\numberline {1.18}Publishing type converters}{12}
+\contentsline {section}{\numberline {1.19}Publishing for optimal reusability}{12}
+\contentsline {section}{\numberline {1.20}Publishing data via SPARQL endpoint}{13}
+\contentsline {subsubsection}{The good}{13}
+\contentsline {subsubsection}{The bad}{14}
+\contentsline {section}{\numberline {1.21}Publishing data via Linked Data}{15}
+\contentsline {subsubsection}{The good}{15}
+\contentsline {subsubsection}{The bad}{15}
+\contentsline {section}{\numberline {1.22}Publishing data via custom API}{16}
+\contentsline {subsubsection}{The good}{16}
+\contentsline {subsubsection}{The bad}{16}
+\contentsline {section}{\numberline {1.23}Consuming distributed data}{17}
+\contentsline {section}{\numberline {1.24}Consuming distributed data via query federation}{17}
+\contentsline {section}{\numberline {1.25}Consuming distributed data via caching locally}{17}
+\contentsline {section}{\numberline {1.26}Integration providers can reduce complexity}{18}
+\contentsline {section}{\numberline {1.27}Data integration can be achieved in an elegant manner}{19}
+\contentsline {paragraph}{Follows from}{19}
+\contentsline {section}{\numberline {1.28}Separation between application logic and data model}{19}
+\contentsline {section}{\numberline {1.29}Internal models are (relatively) flexible}{20}