001/* A class to create the provenance schemas. 002 003Copyright (c) 2009-2010 The Regents of the University of California. 004All rights reserved. 005Permission is hereby granted, without written agreement and without 006license or royalty fees, to use, copy, modify, and distribute this 007software and its documentation for any purpose, provided that the above 008copyright notice and the following two paragraphs appear in all copies 009of this software. 010 011IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 012FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 013ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 014THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 015SUCH DAMAGE. 016 017THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 018INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 019MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 020PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 021CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 022ENHANCEMENTS, OR MODIFICATIONS. 023 024*/ 025 026package org.kepler.provenance.sql; 027 028import java.sql.SQLException; 029 030import org.kepler.util.sql.Column; 031import org.kepler.util.sql.DatabaseType; 032import org.kepler.util.sql.Schema; 033import org.kepler.util.sql.Table; 034 035 036/** A class to create the provenance schemas. 037 * 038 * @author Daniel Crawl 039 * @version $Id: Schemas.java 34596 2017-08-10 23:33:56Z crawl $ 040 * 041 */ 042 043public class Schemas 044{ 045 /** This class cannot be instantiated. */ 046 private Schemas() { } 047 048 /** Get the v6 provenance schema. */ 049 public static Schema createSchemaV6() 050 { 051 Schema retval = new Schema(6); 052 053 // the action table 054 Table actionTable = retval.createTable("action"); 055 actionTable.putColumn("id", Column.AUTOINCID); 056 actionTable.putColumn("parent_id", Column.INTEGER); 057 actionTable.putColumn("time", Column.TIMESTAMP); 058 actionTable.putColumn("user", Column.TEXT); 059 060 // the actor table 061 Table actorTable = retval.createTable("actor"); 062 actorTable.putColumn("id", Column.PK_INTEGER, "entity", "id"); 063 actorTable.putColumn("class", Column.TEXT); 064 065 // the actor_fire table 066 Table actorFireTable = retval.createTable("actor_fire"); 067 actorFireTable.putColumn("actor_id", Column.INTEGER, "actor", "id"); 068 actorFireTable.putColumn("end_time", Column.TIMESTAMP); 069 actorFireTable.putColumn("id", Column.AUTOINCID); 070 actorFireTable.putColumn("start_time", Column.TIMESTAMP); 071 actorFireTable.putColumn("type", Column.TEXT); 072 actorFireTable.putColumn("wf_exec_id", Column.INTEGER, "workflow_exec", 073 "id"); 074 075 // the director table 076 Table directorTable = retval.createTable("director"); 077 directorTable.putColumn("class", Column.TEXT); 078 directorTable.putColumn("id", Column.PK_INTEGER, "entity", "id"); 079 080 // the entity table 081 Table entityTable = retval.createTable("entity"); 082 entityTable.putColumn("container_id", Column.INTEGER); 083 entityTable.putColumn("id", Column.AUTOINCID); 084 entityTable.putColumn("name", Column.TEXT); 085 entityTable.putColumn("type", Column.TEXT); 086 entityTable.putColumn("workflow_id", Column.INTEGER, "workflow", "id"); 087 088 // the link table 089 Table linkTable = retval.createTable("link"); 090 linkTable.putColumn("end_point_1", Column.INTEGER); 091 linkTable.putColumn("end_point_2", Column.INTEGER); 092 linkTable.putColumn("id", Column.PK_INTEGER, "entity", "id"); 093 094 // the parameter table 095 Table parameterTable = retval.createTable("parameter"); 096 parameterTable.putColumn("id", Column.PK_INTEGER, "entity", "id"); 097 parameterTable.putColumn("type", Column.TEXT); 098 // value is allowed to be null since oracle treats "" as null 099 parameterTable.putColumn("value", Column.NULLABLE_TEXT); 100 101 // the port table 102 Table portTable = retval.createTable("port"); 103 portTable.putColumn("direction", Column.INTEGER); 104 portTable.putColumn("id", Column.PK_INTEGER, "entity", "id"); 105 portTable.putColumn("multiport", Column.BOOLEAN); 106 portTable.putColumn("type", Column.TEXT); 107 108 // the relation table 109 Table relationTable = retval.createTable("relation"); 110 relationTable.putColumn("id", Column.PK_INTEGER, "entity", "id"); 111 relationTable.putColumn("width", Column.INTEGER); 112 113 // the token_flow table 114 Table tokenFlowTable = retval.createTable("token_flow"); 115 tokenFlowTable.putColumn("channel", Column.INTEGER); 116 tokenFlowTable.putColumn("data", Column.TEXT); 117 tokenFlowTable.putColumn("data_description", Column.INTEGER); 118 tokenFlowTable.putColumn("fire_id", Column.INTEGER, "actor_fire", "id"); 119 tokenFlowTable.putColumn("id", Column.AUTOINCID); 120 tokenFlowTable.putColumn("is_read", Column.BOOLEAN); 121 tokenFlowTable.putColumn("port_id", Column.INTEGER, "port", "id"); 122 tokenFlowTable.putColumn("rw_fire_id", Column.INTEGER, "actor_fire", 123 "id"); 124 tokenFlowTable.putColumn("time", Column.TIMESTAMP); 125 126 // the workflow table 127 Table workflowTable = retval.createTable("workflow"); 128 workflowTable.putColumn("action_id", Column.INTEGER, "action", "id"); 129 workflowTable.putColumn("id", Column.AUTOINCID); 130 workflowTable.putColumn("name", Column.TEXT); 131 workflowTable.putColumn("version", Column.TEXT); 132 133 // the workflow exec table 134 Table workflowExecTable = retval.createTable("workflow_exec"); 135 workflowExecTable.putColumn("id", Column.AUTOINCID); 136 workflowExecTable.putColumn("end_time", Column.TIMESTAMP); 137 workflowExecTable.putColumn("start_time", Column.TIMESTAMP); 138 workflowExecTable.putColumn("user", Column.TEXT); 139 workflowExecTable.putColumn("workflow_id", Column.INTEGER, "workflow", 140 "id"); 141 142 return retval; 143 } 144 145 /** Get the v7 provenance schema. */ 146 public static Schema createSchemaV7() 147 { 148 Schema retval = createSchemaV6(); 149 retval.setMajorVersion(7); 150 151 // remove the action table 152 retval.removeTable("action"); 153 154 // the entity table 155 Table entityTable = retval.getTable("entity"); 156 entityTable.putColumn("deleted", new Column(Column.Type.Boolean, "FALSE")); 157 entityTable.putColumn("prev_id", Column.INTEGER); 158 entityTable.putColumn("wf_change_id", Column.INTEGER, 159 "workflow_change", "id"); 160 entityTable.removeColumn("workflow_id"); 161 entityTable.putColumn("wf_id", Column.INTEGER, "workflow", "id"); 162 163 // update the workflow table 164 Table workflowTable = retval.getTable("workflow"); 165 workflowTable.removeColumn("action_id"); 166 workflowTable.removeColumn("version"); 167 168 // add the workflow change table 169 Table workflowChangeTable = retval.createTable("workflow_change"); 170 workflowChangeTable.putColumn("id", Column.AUTOINCID); 171 workflowChangeTable.putColumn("time", Column.TIMESTAMP); 172 workflowChangeTable.putColumn("user", Column.TEXT); 173 workflowChangeTable.putColumn("wf_id", Column.INTEGER, "workflow", 174 "id"); 175 176 // update the workflow exec table 177 Table workflowExecTable = retval.getTable("workflow_exec"); 178 workflowExecTable.removeColumn("workflow_id"); 179 workflowExecTable.putColumn("wf_id", Column.INTEGER, "workflow", "id"); 180 181 return retval; 182 } 183 184 /** Get the newest v8 provenance schema. */ 185 public static Schema createSchemaV8() 186 { 187 return createSchemaV8(-1); 188 } 189 190 /** Get the V8 provenance schema at a specific minor version. 191 * @param minorVersion the minor version of V8 to return. If -1, 192 * return the latest V8. 193 */ 194 public static Schema createSchemaV8(int minorVersion) 195 { 196 Schema retval = createSchemaV7(); 197 retval.setMajorVersion(8); 198 199 // remove type from actor_fire 200 Table actorFireTable = retval.getTable("actor_fire"); 201 actorFireTable.removeColumn("type"); 202 203 // add associated file table 204 Table assocDataTable = retval.createTable("associated_data"); 205 assocDataTable.putColumn("data_id", Column.MD5_TEXT, "data", "md5"); 206 assocDataTable.putColumn("id", Column.AUTOINCID); 207 assocDataTable.putColumn("name", Column.TEXT); 208 assocDataTable.putColumn("val", Column.TEXT); 209 assocDataTable.putColumn("wf_exec_id", Column.INTEGER, "workflow_exec", "id"); 210 211 // add the data table 212 Table dataTable = retval.createTable("data"); 213 dataTable.putColumn("contents", Column.BLOB); 214 dataTable.putColumn("md5", Column.PK_MD5_TEXT); 215 dataTable.putColumn("truncated", Column.BOOLEAN); 216 217 // add the error table 218 Table errorTable = retval.createTable("error"); 219 // we don't always know the source, so this can be null 220 errorTable.putColumn("entity_id", Column.NULLABLE_INTEGER); 221 errorTable.putColumn("id", Column.AUTOINCID); 222 errorTable.putColumn("exec_id", Column.INTEGER, "workflow_exec", "id"); 223 // text can be null since there may be no message in the 224 // exception. 225 errorTable.putColumn("message", Column.NULLABLE_TEXT); 226 227 Table entityTable = retval.getTable("entity"); 228 // NOTE: these columns can be empty so must be nullable 229 // for oracle. 230 entityTable.putColumn("display", Column.NULLABLE_TEXT); 231 entityTable.putColumn("name", Column.NULLABLE_TEXT); 232 entityTable.removeColumn("container_id"); 233 234 // remove unused link table 235 retval.removeTable("link"); 236 237 // the parameter exec table 238 Table parameterExecTable = retval.createTable("parameter_exec"); 239 parameterExecTable.putColumn("parameter_id", Column.INTEGER, "parameter", "id"); 240 parameterExecTable.putColumn("wf_exec_id", Column.INTEGER, "workflow_exec", "id"); 241 242 // the port table 243 Table portTable = retval.getTable("port"); 244 // remove type since port_event.type records token type. 245 portTable.removeColumn("type"); 246 247 // the port event table 248 Table portEventTable = retval.createTable("port_event"); 249 portEventTable.putColumn("channel", Column.INTEGER); 250 portEventTable.putColumn("data_id", Column.MD5_TEXT, "data", "md5"); 251 portEventTable.putColumn("fire_id", Column.INTEGER, "actor_fire", "id"); 252 portEventTable.putColumn("file_id", Column.NULLABLE_MD5_TEXT); 253 portEventTable.putColumn("id", Column.AUTOINCID); 254 portEventTable.putColumn("port_id", Column.INTEGER, "port", "id"); 255 portEventTable.putColumn("time", Column.TIMESTAMP); 256 portEventTable.putColumn("type", Column.TEXT); 257 portEventTable.putColumn("write_event_id", Column.INTEGER); 258 259 // remove unused relation table 260 retval.removeTable("relation"); 261 262 // add table for tags 263 Table tagTable = retval.createTable("tag"); 264 tagTable.putColumn("id", Column.AUTOINCID); 265 tagTable.putColumn("searchstring", Column.TEXT); 266 tagTable.putColumn("type", Column.TEXT); 267 tagTable.putColumn("urn", Column.TEXT); 268 tagTable.putColumn("wf_exec_id", Column.INTEGER, "workflow_exec", "id"); 269 270 // remove the token_flow table 271 retval.removeTable("token_flow"); 272 273 // update the workflow table 274 Table workflowTable = retval.getTable("workflow"); 275 // workflow id is now lsid 276 workflowTable.putColumn("lsid", Column.TEXT); 277 // NOTE: the name column can be empty so must be nullable 278 // for oracle. 279 workflowTable.putColumn("name", Column.NULLABLE_TEXT); 280 281 // update the workflow change table 282 Table workflowChangeTable = retval.getTable("workflow_change"); 283 workflowChangeTable.putColumn("host_id", Column.TEXT); 284 285 // update the workflow_exec table 286 Table workflowExecTable = retval.getTable("workflow_exec"); 287 workflowExecTable.putColumn("annotation", Column.NULLABLE_TEXT); 288 workflowExecTable.putColumn("host_id", Column.TEXT); 289 workflowExecTable.putColumn("lsid", Column.TEXT); 290 workflowExecTable.putColumn("wf_contents_id", Column.MD5_TEXT, "data", 291 "md5"); 292 workflowExecTable.putColumn("wf_full_lsid", Column.TEXT); 293 workflowExecTable.putColumn("derived_from", Column.NULLABLE_TEXT); 294 295 if(minorVersion == 0) 296 { 297 return retval; 298 } 299 300 // below this line is 8.1 301 retval.setMinorVersion(1); 302 303 // NOTE: the default value for module_dependencies is "core" since 304 // that was the value used in the kepler 2.0 release. 305 306 workflowExecTable.putColumn("module_dependencies", new Column(Column.Type.Varchar, 2000), "core"); 307 workflowExecTable.putColumn("type", Column.TEXT, "unknown"); 308 309 if(minorVersion == 1) 310 { 311 return retval; 312 } 313 314 // below this line is 8.2 315 retval.setMinorVersion(2); 316 317 // module_dependencies can be large so change the type to unlimited length 318 // NOTE: cannot set default value for TEXT_UNLIMITED 319 workflowExecTable.putColumn("module_dependencies", Column.TEXT_UNLIMITED); 320 321 // the actor_state table 322 Table actorStateTable = retval.createTable("actor_state"); 323 actorStateTable.putColumn("fire_id", Column.PK_INTEGER, "actor_fire", "id"); 324 actorStateTable.putColumn("state", Column.BLOB); 325 326 // add indexes to entity table 327 entityTable.putIndex("entity_query_idx", "name, type, wf_change_id"); 328 329 if(minorVersion == 2) 330 { 331 return retval; 332 } 333 334 // below this line is 8.3 335 retval.setMinorVersion(3); 336 337 // add an index to portEvent.write_event_id to speed up queries 338 // used by RecordPlayer 339 portEventTable.putIndex("write_event_id_idx", "write_event_id"); 340 341 // allow port_event.data_id to have null values 342 portEventTable.putColumn("data_id", Column.NULLABLE_MD5_TEXT, "data", "md5"); 343 344 // allow port_event.type to have null values 345 portEventTable.putColumn("type", Column.NULLABLE_TEXT); 346 347 // create port_event.data 348 portEventTable.putColumn("data", 349 new Column(Column.Type.Varchar, SQLRecordingV8.MAX_PORT_EVENT_DATA_LENGTH, true)); 350 351 352 if(minorVersion == 3) { 353 return retval; 354 } 355 356 // below this line is 8.4 357 retval.setMinorVersion(4); 358 359 // allow associated_data.data_id to have null values. 360 assocDataTable.putColumn("data_id", Column.NULLABLE_MD5_TEXT, "data", "md5"); 361 362 if(minorVersion == 4) { 363 return retval; 364 } 365 366 return retval; 367 } 368 369 /** Verify the schema version in the database matches the version 370 * of the schema parameter. If the version table is not found, 371 * attempt to guess the version based on the schema structure. 372 * If the database appears empty, create the tables in the schema. 373 * If the database schema version is older than the schema 374 * parameter's version, asks if an upgrade is performed. If the 375 * database schema is newer, throws an exception. 376 */ 377 public synchronized static void checkVersion(DatabaseType dbType, Schema schema) throws SQLException 378 { 379 String dbVersionStr; 380 381 // attempt to get the version in the database 382 Integer dbMajorVersion = dbType.getMajorVersion(); 383 Integer dbMinorVersion = dbType.getMinorVersion(); 384 385 // see if version table did not exist 386 if(dbMajorVersion == null) 387 { 388 // guess the version 389 390 // version 6 had the action table 391 if(dbType.tableExists(dbType.getTableName("action"))) 392 { 393 dbType.createVersionTable(6, 0); 394 dbVersionStr = "6.0"; 395 } 396 // XXX we assume v8 instead of checking for v7. 397 else if(dbType.tableExists(dbType.getTableName("data"))) 398 { 399 dbType.createVersionTable(8, 0); 400 dbVersionStr = "8.0"; 401 } 402 else 403 { 404 // create tables 405 dbType.createTables(schema); 406 dbVersionStr = schema.getVersionString(); 407 } 408 } 409 // see if the minor version did not exist. 410 else if(dbMinorVersion == null) 411 { 412 // create the minor version information 413 dbType.addMinorVersion(0); 414 dbVersionStr = dbMajorVersion + ".0"; 415 } 416 else 417 { 418 dbVersionStr = dbMajorVersion + "." + dbMinorVersion; 419 } 420 421 // see if the database is older than the schema 422 if(dbType.hasOlderSchema(schema)) 423 { 424 if(!Upgrade.askAndUpgrade(dbType, schema)) 425 { 426 throw new SQLException("User chose not to upgrade schema."); 427 } 428 } 429 // see if the database if newer than the schema 430 else if(dbType.hasNewerSchema(schema)) 431 { 432 throw new SQLException("Database schema (version " + 433 dbVersionStr + ") is too new. You must use either an older or empty database."); 434 } 435 } 436}