Populate any postgres table with random values using this script!
Just this morning, I had this itch to create a script that can populate any table in my database without me having to check what the column names were. My work has become extremely exhausting that the thought of creating dummy values for tables (especially ones with more than 5 columns) became a very daunting task. So here I am presenting the very script I made.
The Aim
Okay, so the aim is very simple, the next time I have to create dummy values for my database, I shouldn't have to think about it. The only thing I should do is run the script and boom! It should be there! If you'd like to skip my ramblings below, you're free to download the script here!
The Tools
For this script, I will be making use of Groovy version 2.4.12 or higher and Java version 11.
The Script
First, I visualized what the inputs will be. Ideally it should be something easy to remember, something like this:
groovy populate-table -dbName=peopledb -dbUser=postgres -dbPassword=password -table=person -rows=10
With this in mind, I started getting the inputs from the terminal, but how exactly do we do this easily? To expedite this process, I made use of groovy-cli-picocli. This library allows us to easily parse and get the values from the terminal.
@Grab("org.codehaus.groovy:groovy-cli-picocli:2.5.15")
private static CliBuilder buildCli() {
String usage = """
Usage:
This script can populate any table in postgres with random values.
Sample usage:
groovy populate-table -dbName=peopledb -dbUser=postgres -dbPassword=password -table=person -rows=10
"""
CliBuilder cliBuilder = new CliBuilder(
name: "populateTable",
width: 1000,
usage: usage
)
cliBuilder.dbName(type: String, argName: "dbName", required: true, "The name of the database.")
cliBuilder.dbUser(type: String, argName: "dbUser", required: true, "The username of the user that can access the database.")
cliBuilder.dbPassword(type: String, argName: "dbPassword", required: true, "The password of the user.")
cliBuilder.table(type: String, argName: "table", required: true, "The name of the table.")
cliBuilder.rows(type: Integer, argName: "rows", required: true, "The number of rows to populate.")
cliBuilder.help(longOpt: "h", "Prints usage.")
return cliBuilder
}
After taking care of the arguments, next we will need to connect to the database. Here's a private method that establishes a connection to the database:
@Grab("org.postgresql:postgresql:42.2.8")
private static Connection initDatabase(String dbName, String dbUser, String dbPassword) {
String driverClassName = "org.postgresql.Driver"
String dbUrl = "jdbc:postgresql://localhost:5432/${dbName}"
Class.forName(driverClassName)
return DriverManager.getConnection(dbUrl, dbUser, dbPassword)
}
Next, we will need to get the column names and we have to figure out what data types they are. Of course I know this wouldn't be perfect especially if we are using enums, but we can always customize that later on I guess.
private static List<ColumnInfo> getAllColumnNamesOfTable(Connection sqlInstance, String tableName) {
String query = "SELECT * FROM ${tableName} limit 1;"
Statement statement = sqlInstance.createStatement()
ResultSet resultSet = statement.executeQuery(query)
ResultSetMetaData resultSetMetaData = resultSet.getMetaData()
int columnCount = resultSetMetaData.getColumnCount()
List<ColumnInfo> columnNames = new ArrayList<>()
(1..columnCount).each {
String name = resultSetMetaData.getColumnName(it)
String type = resultSetMetaData.getColumnClassName(it)
columnNames.add(new ColumnInfo(name, type))
}
return columnNames
}
public class ColumnInfo {
private String name
private String dataType
public ColumnInfo(String name, String dataType) {
this.name = name
this.dataType = dataType
}
public String getName() {
return name
}
public String getDataType() {
return dataType
}
}
After getting the details of our column, it's time to generate some insert queries. My script is surely missing other datatypes, but I'll get it working as I go!
private List<String> generateQueries(String table, int numOfRowsToPopulate, List<ColumnInfo> columnInfoList) {
List<String> queries = []
(0..numOfRowsToPopulate).each { int rowNumber ->
List<String> values = []
columnInfoList.each { ColumnInfo columnInfo ->
values.add(generateRandomValueForColumnInfo(columnInfo, rowNumber))
}
String query = "INSERT INTO ${table} values (${values.join(",")});"
queries.add(query)
}
return queries
}
private String generateRandomValueForColumnInfo(ColumnInfo columnInfo, int rowNumber) {
if (columnInfo.getDataType() == "java.util.UUID") {
return "'${UUID.randomUUID().toString()}'"
}
if (columnInfo.getDataType() == "java.lang.Long" || columnInfo.getDataType() == "java.lang.Integer") {
return rowNumber.toString()
}
if (columnInfo.getDataType() == "java.sql.Timestamp") {
return "'${new Timestamp(System.currentTimeMillis()).toString()}'"
}
if (columnInfo.getDataType() == "java.lang.String") {
return "'${RandomStringUtils.random(10, true, true)}'"
}
// TODO: Add other data types here.
return ""
}
After generating our insert statements, we will have to actually insert them in the database:
private void insertQueriesToTable(Connection sql, List<String> queries) {
println(" \nExecuting queries:")
queries.each {String query ->
println("* ${query}")
Statement statement = sql.createStatement()
statement.executeUpdate(query)
}
println("\n Done inserting ${queries.size()} queries!")
}
And there it is! The damn thing is finally finished! It's quite dirty and probably stupid but who the hell wants to think about the nitty gritty of inserting dummy data into a table if a script can do it for you? Right? Anyway, here's the complete code for all of you who stuck around, and here's a download link for the lazy. :)
// Java version 11
// Groovy version 2.4.12 or higher
@GrabConfig(systemClassLoader = true)
@Grab("org.codehaus.groovy:groovy-cli-picocli:2.5.15")
@Grab("org.postgresql:postgresql:42.2.8")
@Grab("org.apache.commons:commons-lang3:3.12.0")
import groovy.cli.picocli.CliBuilder
import groovy.cli.picocli.OptionAccessor
import java.awt.peer.CanvasPeer
import java.sql.*
import org.apache.commons.lang3.RandomStringUtils
import java.sql.ResultSet
import java.sql.ResultSetMetaData
CliBuilder cli = buildCli()
OptionAccessor options = cli.parse(args)
if (!options) {
System.exit(0)
}
if (options.help) {
cli.usage()
System.exit(0)
}
println(" -----------------------------------------------------------------------------------------")
println(" Populating table ${options.table}, in database ${options.dbName} with ${options.rows} rows...")
Connection sql = initDatabase(options.dbName, options.dbUser, options.dbPassword)
List<ColumnInfo> columns = getAllColumnNamesOfTable(sql, options.table)
listColumnsInfo(columns)
List<String> queries = generateQueries(options.table, options.rows, columns)
insertQueriesToTable(sql, queries)
sql.close()
private void insertQueriesToTable(Connection sql, List<String> queries) {
println(" \nExecuting queries:")
queries.each {String query ->
println("* ${query}")
Statement statement = sql.createStatement()
statement.executeUpdate(query)
}
println("\n Done inserting ${queries.size()} queries!")
}
private List<String> generateQueries(String table, int numOfRowsToPopulate, List<ColumnInfo> columnInfoList) {
List<String> queries = []
(0..numOfRowsToPopulate).each { int rowNumber ->
List<String> values = []
columnInfoList.each { ColumnInfo columnInfo ->
values.add(generateRandomValueForColumnInfo(columnInfo, rowNumber))
}
String query = "INSERT INTO ${table} values (${values.join(",")});"
queries.add(query)
}
return queries
}
private String generateRandomValueForColumnInfo(ColumnInfo columnInfo, int rowNumber) {
if (columnInfo.getDataType() == "java.util.UUID") {
return "'${UUID.randomUUID().toString()}'"
}
if (columnInfo.getDataType() == "java.lang.Long" || columnInfo.getDataType() == "java.lang.Integer") {
return rowNumber.toString()
}
if (columnInfo.getDataType() == "java.sql.Timestamp") {
return "'${new Timestamp(System.currentTimeMillis()).toString()}'"
}
if (columnInfo.getDataType() == "java.lang.String") {
return "'${RandomStringUtils.random(10, true, true)}'"
}
// TODO: Add other data types here.
return ""
}
private void listColumnsInfo(List<ColumnInfo> columnInfoList) {
println("\n Found ${columnInfoList.size()} columns with the following types: \n")
columnInfoList.each {
println(" name: ${it.getName()} | dataType: ${it.getDataType()}")
}
}
private static List<ColumnInfo> getAllColumnNamesOfTable(Connection sqlInstance, String tableName) {
String query = "SELECT * FROM ${tableName} limit 1;"
Statement statement = sqlInstance.createStatement()
ResultSet resultSet = statement.executeQuery(query)
ResultSetMetaData resultSetMetaData = resultSet.getMetaData()
int columnCount = resultSetMetaData.getColumnCount()
List<ColumnInfo> columnNames = new ArrayList<>()
(1..columnCount).each {
String name = resultSetMetaData.getColumnName(it)
String type = resultSetMetaData.getColumnClassName(it)
columnNames.add(new ColumnInfo(name, type))
}
return columnNames
}
private static Connection initDatabase(String dbName, String dbUser, String dbPassword) {
String driverClassName = "org.postgresql.Driver"
String dbUrl = "jdbc:postgresql://localhost:5432/${dbName}"
Class.forName(driverClassName)
return DriverManager.getConnection(dbUrl, dbUser, dbPassword)
}
private static CliBuilder buildCli() {
String usage = """
Usage:
This script can populate any table in postgres with random values.
Sample usage:
groovy populate-table -dbName=peopledb -dbUser=postgres -dbPassword=password -table=person -rows=10
"""
CliBuilder cliBuilder = new CliBuilder(
name: "populateTable",
width: 1000,
usage: usage
)
cliBuilder.dbName(type: String, argName: "dbName", required: true, "The name of the database.")
cliBuilder.dbUser(type: String, argName: "dbUser", required: true, "The username of the user that can access the database.")
cliBuilder.dbPassword(type: String, argName: "dbPassword", required: true, "The password of the user.")
cliBuilder.table(type: String, argName: "table", required: true, "The name of the table.")
cliBuilder.rows(type: Integer, argName: "rows", required: true, "The number of rows to populate.")
cliBuilder.help(longOpt: "h", "Prints usage.")
return cliBuilder
}
public class ColumnInfo {
private String name
private String dataType
public ColumnInfo(String name, String dataType) {
this.name = name
this.dataType = dataType
}
public String getName() {
return name
}
public String getDataType() {
return dataType
}
}
Comments
Post a Comment