ingest script

master
Brian Sakal 2 years ago
parent c428dfec43
commit d519084df9

2
.gitignore vendored

@ -1,4 +1,6 @@
*.lmdb
*.lmdb-lock
*.duckdb
*.duckdb.wal
*.parquet
node_modules

111
dist/ingest.js vendored

@ -1,14 +1,105 @@
import fs from "fs/promises";
import duckdb from "duckdb";
const db = new duckdb.Database("./options.duckdb");
const filename = "/home/brian/Downloads/options-data/2013-01-02options.cvs";
db.run(`CREATE TABLE prices AS SELECT * FROM read_csv_auto('${filename}')`, (err) => {
if (err) {
throw err;
const db = new duckdb.Database("quotes.duckdb");
const pathToCsvs = "/home/brian/Downloads/options-data";
const statements = [
"CREATE TYPE OPTION_TYPE as ENUM ('put', 'call');",
"CREATE TYPE OPTION_STYLE as ENUM ('A', 'E');",
`CREATE TABLE IF NOT EXISTS option_quote (
contract VARCHAR GENERATED ALWAYS AS (
CONCAT(
underlying ,
RIGHT(YEAR(expiration)::VARCHAR,2) ,
LPAD(MONTH(expiration)::VARCHAR,2,'0') ,
LPAD(DAY(expiration)::VARCHAR,2,'0') ,
(CASE WHEN type = 'call' THEN 'C' ELSE 'P' END) ,
LPAD(((strike*1000)::INTEGER)::VARCHAR,8,'0')
)
) VIRTUAL,
underlying VARCHAR,
expiration DATE,
type OPTION_TYPE,
strike FLOAT,
style OPTION_STYLE,
bid FLOAT,
bid_size INTEGER DEFAULT 0,
ask FLOAT,
ask_size INTEGER DEFAULT 0,
volume INTEGER,
open_interest INTEGER,
quote_date DATE,
delta FLOAT,
gamma FLOAT,
theta FLOAT,
vega FLOAT,
implied_volatility FLOAT
);`,
`CREATE TABLE IF NOT EXISTS stock_quote (
quote_date DATE,
symbol VARCHAR,
open FLOAT DEFAULT 0.0,
high FLOAT DEFAULT 0.0,
low FLOAT DEFAULT 0.0,
close FLOAT DEFAULT 0.0,
volume FLOAT DEFAULT 0.0,
adjust_close FLOAT DEFAULT 0.0
);`
];
try {
const files = await fs.readdir(pathToCsvs);
for (const filename of files) {
const fileExtension = filename.substring(filename.length - 11);
if (fileExtension === "options.csv" || fileExtension === "options.cvs") {
const quoteDate = filename.substring(0, 10);
statements.push(`INSERT INTO option_quote (
SELECT
underlying,
expiration,
type,
strike,
style,
bid,
bid_size,
ask,
ask_size,
volume,
open_interest,
quote_date,
delta,
gamma,
theta,
vega,
implied_volatility
FROM read_csv_auto('${pathToCsvs}/${filename}')
);`);
statements.push(`INSERT INTO stock_quote (
SELECT
'${quoteDate}',
symbol,
open,
high,
low,
close,
volume,
adjust_close
FROM read_csv_auto('${pathToCsvs}/${quoteDate}stocks.cvs')
);`);
}
}
db.all("SELECT count(*) AS count FROM prices WHERE underlying = 'TSLA'", function(err2, res) {
if (err2) {
throw err2;
console.log(statements);
db.exec(statements.join(" "), (err) => {
if (err) {
console.error(err);
return;
}
console.log(res[0].count);
db.all("SELECT contract FROM option_quote WHERE underlying = 'TSLA' LIMIT 10", (err2, res) => {
if (err2) {
console.error(err2);
return;
}
console.log(res[0]);
});
});
});
} catch (err) {
console.error(err);
}

@ -1,11 +1,118 @@
import fs from 'fs/promises';
import duckdb from 'duckdb';
const db = new duckdb.Database('./options.duckdb'); // or a file name for a persistent DB
const db = new duckdb.Database('quotes.duckdb'); // or a file name for a persistent DB
const filename = '/home/brian/Downloads/options-data/2013-01-02options.cvs';
db.run(`CREATE TABLE prices AS SELECT * FROM read_csv_auto('${filename}')`, (err)=>{
const pathToCsvs = "/home/brian/Downloads/options-data";
const statements = [
"CREATE TYPE OPTION_TYPE as ENUM ('put', 'call');",
"CREATE TYPE OPTION_STYLE as ENUM ('A', 'E');",
`CREATE TABLE IF NOT EXISTS option_quote (
contract VARCHAR GENERATED ALWAYS AS (
CONCAT(
underlying ,
RIGHT(YEAR(expiration)::VARCHAR,2) ,
LPAD(MONTH(expiration)::VARCHAR,2,'0') ,
LPAD(DAY(expiration)::VARCHAR,2,'0') ,
(CASE WHEN type = 'call' THEN 'C' ELSE 'P' END) ,
LPAD(((strike*1000)::INTEGER)::VARCHAR,8,'0')
)
) VIRTUAL,
underlying VARCHAR,
expiration DATE,
type OPTION_TYPE,
strike FLOAT,
style OPTION_STYLE,
bid FLOAT,
bid_size INTEGER DEFAULT 0,
ask FLOAT,
ask_size INTEGER DEFAULT 0,
volume INTEGER,
open_interest INTEGER,
quote_date DATE,
delta FLOAT,
gamma FLOAT,
theta FLOAT,
vega FLOAT,
implied_volatility FLOAT
);`,
`CREATE TABLE IF NOT EXISTS stock_quote (
quote_date DATE,
symbol VARCHAR,
open FLOAT DEFAULT 0.0,
high FLOAT DEFAULT 0.0,
low FLOAT DEFAULT 0.0,
close FLOAT DEFAULT 0.0,
volume FLOAT DEFAULT 0.0,
adjust_close FLOAT DEFAULT 0.0
);`
];
try {
const files = await fs.readdir(pathToCsvs);
for (const filename of files){
const fileExtension = filename.substring(filename.length-11);
if(fileExtension === 'options.csv' || fileExtension === 'options.cvs'){
const quoteDate = filename.substring(0,10);
statements.push(`INSERT INTO option_quote (
SELECT
underlying,
expiration,
type,
strike,
style,
bid,
bid_size,
ask,
ask_size,
volume,
open_interest,
quote_date,
delta,
gamma,
theta,
vega,
implied_volatility
FROM read_csv_auto('${pathToCsvs}/${filename}')
);`);
statements.push(`INSERT INTO stock_quote (
SELECT
'${quoteDate}',
symbol,
open,
high,
low,
close,
volume,
adjust_close
FROM read_csv_auto('${pathToCsvs}/${quoteDate}stocks.cvs')
);`);
}
}
console.log(statements);
db.exec(statements.join(' '), (err)=>{
if(err){
console.error(err);
return;
}
db.all("SELECT contract FROM option_quote WHERE underlying = 'TSLA' LIMIT 10", (err, res)=>{
if(err){
console.error(err);
return;
}
console.log(res[0])
});
})
} catch (err) {
console.error(err);
}
/*
db.run(`CREATE TABLE option_quote AS SELECT * FROM read_csv_auto('${filename}')`, (err)=>{
if (err) {
throw err;
}
db.run(``);
db.all("SELECT count(*) AS count FROM prices WHERE underlying = 'TSLA'", function(err, res) {
if (err) {
throw err;
@ -14,3 +121,4 @@ db.run(`CREATE TABLE prices AS SELECT * FROM read_csv_auto('${filename}')`, (err
});
});
*/

2215
package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -1,10 +1,14 @@
{
"name": "calendar-optimizer-deno",
"type": "module",
"scripts": {
"build": "yarn esbuild ingest.ts --outdir=dist --platform=node --format=esm"
},
"dependencies": {
"duckdb": "^0.7.1"
},
"devDependencies": {
"@types/node": "^18.16.3",
"esbuild": "^0.17.18"
}
}

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save