From 7a6a126fe4c7718e2d04cffdc91a333cbe1d01ce Mon Sep 17 00:00:00 2001 From: Jeremiah Peschka Date: Sat, 25 Jan 2014 06:58:54 -0800 Subject: [PATCH 01/11] Added support for LinkTypes and new User field. * `Users` now has an `AccountId` field * Added `PostLinks` table and `LinkType` table * Cleaned up readmes * Commented out support for MySQL and SQLite * Removed references to MySQL and SQLite from documentation Closes #1 --- soddi/App.config | 4 +-- soddi/Loaders/Scripts/mssql.sql | 24 ++++++++++++++++- soddi/README.txt | 45 +++++++++---------------------- soddi/ReadMe.htm | 48 +++++++++++---------------------- soddi/TableTypes/PostLinks.cs | 25 +++++++++++++++++ soddi/TableTypes/Users.cs | 13 +-------- soddi/soddi.csproj | 1 + 7 files changed, 80 insertions(+), 80 deletions(-) create mode 100644 soddi/TableTypes/PostLinks.cs diff --git a/soddi/App.config b/soddi/App.config index 9cf9933..8adef64 100644 --- a/soddi/App.config +++ b/soddi/App.config @@ -9,8 +9,8 @@ - - + diff --git a/soddi/Loaders/Scripts/mssql.sql b/soddi/Loaders/Scripts/mssql.sql index 955deb1..73c7b65 100644 --- a/soddi/Loaders/Scripts/mssql.sql +++ b/soddi/Loaders/Scripts/mssql.sql @@ -16,11 +16,21 @@ IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'DUMMY.[Votes DROP TABLE DUMMY.[Votes] IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'DUMMY.[VoteTypes]') AND type in (N'U')) DROP TABLE DUMMY.[VoteTypes] +IF EXISTS (SELECT * FROM sys.objects WHERE OBJECT_ID = OBJECT_ID(N'DUMMY.[PostLinks]') AND type IN (N'U')) +DROP TABLE DUMMY.[PostLinks] +IF EXISTS (SELECT * FROM sys.objects WHERE OBJECT_ID = OBJECT_ID(N'DUMMY.[LinkTypes]') AND type IN (N'U')) +DROP TABLE DUMMY.[LinkTypes] SET ansi_nulls ON SET quoted_identifier ON SET ansi_padding ON +CREATE TABLE DUMMY.[LinkTypes] ( + Id INT NOT NULL, + [Type] VARCHAR(40) NOT NULL, + CONSTRAINT PK_LinkTypes PRIMARY KEY CLUSTERED (Id ASC) +); + CREATE TABLE DUMMY.[VoteTypes] ( [Id] [INT] NOT NULL, [Name] [VARCHAR](40) NOT NULL @@ -63,6 +73,8 @@ INSERT DUMMY.[VoteTypes] ([Id], [Name]) VALUES(12,N'Spam') INSERT DUMMY.[VoteTypes] ([Id], [Name]) VALUES(13,N'InformModerator') INSERT DUMMY.[PostTypes] ([Id], [Type]) VALUES(1, N'Question') INSERT DUMMY.[PostTypes] ([Id], [Type]) VALUES(2, N'Answer') +INSERT DUMMY.[LinkTypes] ([Id], [Type]) VALUES(1, N'Linked') +INSERT DUMMY.[LinkTypes] ([Id], [Type]) VALUES(3, N'Duplicate') IF 0 = 1--FULLTEXT BEGIN @@ -120,7 +132,8 @@ CREATE TABLE DUMMY.[Users] ( [Reputation] [INT] NOT NULL, [UpVotes] [INT] NOT NULL, [Views] [INT] NOT NULL, - [WebsiteUrl] [NVARCHAR](200) NULL + [WebsiteUrl] [NVARCHAR](200) NULL, + [AccountId] [INT] NULL , CONSTRAINT [PK_Users] PRIMARY KEY CLUSTERED ( [Id] ASC ) ON [PRIMARY] ) ON [PRIMARY] @@ -251,6 +264,15 @@ CREATE TABLE DUMMY.[Badges] ( , CONSTRAINT [PK_Badges] PRIMARY KEY CLUSTERED ( [Id] ASC ) ON [PRIMARY] ) ON [PRIMARY] +CREATE TABLE DUMMY.[PostLinks] ( + Id INT NOT NULL, + CreationDate DATETIME NOT NULL, + PostId INT NOT NULL, + RelatedPostId INT NOT NULL, + LinkTypeId TINYINT NOT NULL, + CONSTRAINT [PK_PostLinks] PRIMARY KEY CLUSTERED ([Id] ASC) +) + IF 0 = 1-- INDICES BEGIN CREATE NONCLUSTERED INDEX [IX_Badges_Id_UserId] ON DUMMY.[Badges] ( diff --git a/soddi/README.txt b/soddi/README.txt index 69d09e7..7101dd7 100644 --- a/soddi/README.txt +++ b/soddi/README.txt @@ -1,27 +1,31 @@  -StackOverflow Data Dump Import v.11 +StackOverflow Data Dump Import v.1.1 - ClickOnce Installer: http://skysanders.net/tools/se/soddi/publish.htm + Portions of code (c) 2014 Jeremiah Peschka + http://github.com/peschkaj/soddi + binaries available at: https://github.com/peschkaj/soddi/releases + + StackOverflow data dump available at: https://archive.org/details/stackexchange - (c) 2010 Sky Sanders + + Original code (c) 2010 Sky Sanders licensed under MIT/GPL - see license.txt - + + ClickOnce Installer: http://skysanders.net/tools/se/soddi/publish.htm + info:http://skysanders.net/tools/se msi :http://skysanders.net/files/soddi.11.msi bin :http://skysanders.net/files/soddi.11.zip src :http://bitbucket.org/bitpusher/soddi/ + SODDI is a .Net 3.5 sp1 executable written in C# that quickly and cleanly imports StackOverflow Data Dump XML files into -MS Sql Server 2000/05/08, MySql Server 5.1 and SQLite3. (MySql and SQLite drivers are included) +MS Sql Server 2005/08/12 (please stop actively using SQL Server 2005). SODDI can be run as a command line utility or, when invoked with no arguments or GUI argument, will present a Windows Form interface. -Quick Start: -The quickest route to your own copy of the StackOverflow databases is to use the ClickOnce installer, -browse to the uncompressed data dump, accept the default SQLite provider selection and click 'Import'. - USAGE: @@ -41,14 +45,6 @@ TARGET A valid ADO.Net connection string, including the provider invari schema named as the site data being imported. e.g. so.Users, meta.Users. The tables are dropped before import. - MySql: Connection string should include server, each site's data will be loaded - into a database named as the site imported. The databases will be dropped and - recreated before import. - - SQLite: Connection string should specify a directory. The data will be imported - into seperate .db3 files, each named as the site imported. Existing data files - will be overwritten. - The target database/datafile/schema names can be modified by explicitely specifying sites to import and appending the desired schema as a parameter value or editing the Sites list item schema in the GUI. @@ -80,12 +76,6 @@ META|SO|SU|SF Specifies which sites to import. If none are specified, all site Sql Server - SO:StackOverflowData will load the data from the XXXXX SO directory into the database specified in the connection string and the schema 'StackOverflowData' - MySql - SO:StackOverflowData will load the data from the XXXXX SO directory - into a new database named StackOverflowData on the server specified in the connection string. - - SQLite - SO:StackOverflowData will load the data from the XXXXX SO directory into a new - db3 file named StackOverflowData.db3 in the directory specified in the connection string. - In GUI mode you may edit the schema item in the Sites list. Options are not case sensitive. @@ -95,15 +85,6 @@ Example command lines. GUI Mode: soddi -SQLite - all sites: - soddi source:"F:\Export-030110" target:"data source=c:\temp;version=3;new=True;Provider=System.Data.SQLite" - -MySql - all sites: - soddi source:"F:\Export-030110" target:"server=localhost;user id=root;password=p@ssW0rd;Provider=MySql.Data.MySqlClient" - -MySql - Meta StackOverflow and StackOverflow data into specified databases: - soddi source:"F:\Export-030110" target:"server=localhost;user id=root;password=p@ssW0rd;Provider=MySql.Data.MySqlClient" meta:MetaDb so:SoDb - Sql Server - all sites: soddi source:"F:\Export-030110" target:"data source=(local);initial catalog=SOData;integrated security=true;Provider=System.Data.SqlClient" diff --git a/soddi/ReadMe.htm b/soddi/ReadMe.htm index c0f77bb..2e3df99 100644 --- a/soddi/ReadMe.htm +++ b/soddi/ReadMe.htm @@ -1,7 +1,7 @@  - StackOverflow Data Dump Import v.09 + StackOverflow Data Dump Import v.1.1