From 113b6e66e5f7a3b7d25cc0c4c51b17562ca78f0d Mon Sep 17 00:00:00 2001 From: Avraham Sakal Date: Sun, 18 Feb 2024 20:40:09 -0500 Subject: [PATCH] ingest symbols and symbol_sync_statuses --- server/package.json | 6 +- server/pnpm-lock.yaml | 23 +++++++ ...ption-quotes-from-polygon-to-clickhouse.ts | 62 +++++++++++++++++++ server/src/scripts/polygon.ts | 4 ++ server/tables.sql | 9 +++ 5 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 server/src/scripts/ingest-option-quotes-from-polygon-to-clickhouse.ts create mode 100644 server/src/scripts/polygon.ts diff --git a/server/package.json b/server/package.json index 0ffcf46..5a12faf 100644 --- a/server/package.json +++ b/server/package.json @@ -2,7 +2,7 @@ "private": true, "type": "module", "scripts": { - "build": "esbuild src/*.ts --platform=node --outdir=dist --format=esm", + "build": "esbuild src/*.ts src/**/*.ts --platform=node --outdir=dist --format=esm", "build-scripts": "esbuild scripts/*.ts --platform=node --outdir=dist/scripts --format=esm", "dev:node": "node --watch dist/index.js", "dev:esbuild": "pnpm run build --watch", @@ -13,7 +13,9 @@ "@sinclair/typebox": "^0.32.5", "@trpc/server": "^10.45.0", "cors": "^2.8.5", - "dotenv": "^16.4.1" + "dotenv": "^16.4.1", + "p-all": "^5.0.0", + "p-throttle": "^6.1.0" }, "devDependencies": { "@types/cors": "^2.8.17", diff --git a/server/pnpm-lock.yaml b/server/pnpm-lock.yaml index de42440..2ab5077 100644 --- a/server/pnpm-lock.yaml +++ b/server/pnpm-lock.yaml @@ -20,6 +20,12 @@ dependencies: dotenv: specifier: ^16.4.1 version: 16.4.1 + p-all: + specifier: ^5.0.0 + version: 5.0.0 + p-throttle: + specifier: ^6.1.0 + version: 6.1.0 devDependencies: '@types/cors': @@ -792,6 +798,23 @@ packages: object-keys: 1.1.1 dev: true + /p-all@5.0.0: + resolution: {integrity: sha512-pofqu/1FhCVa+78xNAptCGc9V45exFz2pvBRyIvgXkNM0Rh18Py7j8pQuSjA+zpabI46v9hRjNWmL9EAFcEbpw==} + engines: {node: '>=16'} + dependencies: + p-map: 6.0.0 + dev: false + + /p-map@6.0.0: + resolution: {integrity: sha512-T8BatKGY+k5rU+Q/GTYgrEf2r4xRMevAN5mtXc2aPc4rS1j3s+vWTaO2Wag94neXuCAUAs8cxBL9EeB5EA6diw==} + engines: {node: '>=16'} + dev: false + + /p-throttle@6.1.0: + resolution: {integrity: sha512-eQMdGTxk2+047La67wefUtt0tEHh7D+C8Jl7QXoFCuIiNYeQ9zWs2AZiJdIAs72rSXZ06t11me2bgalRNdy3SQ==} + engines: {node: '>=18'} + dev: false + /parse-json@4.0.0: resolution: {integrity: sha512-aOIos8bujGN93/8Ox/jPLh7RwVnPEysynVFE+fQZyg6jKELEHwzgKdLRFHUgXJL6kylijVSBC4BvN9OmsB48Rw==} engines: {node: '>=4'} diff --git a/server/src/scripts/ingest-option-quotes-from-polygon-to-clickhouse.ts b/server/src/scripts/ingest-option-quotes-from-polygon-to-clickhouse.ts new file mode 100644 index 0000000..4d57b13 --- /dev/null +++ b/server/src/scripts/ingest-option-quotes-from-polygon-to-clickhouse.ts @@ -0,0 +1,62 @@ +import { clickhouse, query } from "../clickhouse.js"; +import { getApiKey } from "./polygon.js"; +import pAll from 'p-all'; + +type PolygonResponse = {next_url?:string, results:Array<{ticker:string}>}; +async function getOptionContracts(underlyingSymbol, asOfDate){ + let latestBatch = await (await fetch(`https://api.polygon.io/v3/reference/options/contracts?underlying_ticker=${underlyingSymbol}&as_of=${asOfDate}&sort=ticker&limit=1000&apiKey=${await getApiKey()}`)).json() as PolygonResponse; + console.log(latestBatch.results.map((r)=>r.ticker)); + while(latestBatch.hasOwnProperty('next_url')){ + latestBatch = await (await fetch(`${latestBatch.next_url}&apiKey=${await getApiKey()}`)).json() as PolygonResponse; + console.log(latestBatch.results.map((r)=>r.ticker)); + } +} + +//await getOptionContracts('AAPL','2024-01-30'); + +/** + * For each symbol in `symbols` table, check the latest `asOfDate` + * in `symbol_sync_statuses` for that symbol. Then fill-in the rest + * of the dates until today's date. + */ +async function fillSyncStatuses(){ + const symbols = (await query(` + SELECT symbol from symbols + `)).map(({symbol})=>symbol); + + console.log('symbols', symbols); + await pAll(symbols.map( + (symbol)=> + ()=>query<{latestAsOfDate:string}>(` + SELECT + latestAsOfDate + FROM( + SELECT last_value(asOfDate) as latestAsOfDate + FROM ( + SELECT * + FROM symbol_sync_statuses + WHERE symbol = '${symbol}' + ORDER BY asOfDate ASC + ) + ) + WHERE latestAsOfDate > '2022-02-18' + `).then((rows)=> + clickhouse.command({ + query: ` + INSERT INTO symbol_sync_statuses + SELECT + '${symbol}' as symbol, + Date(dateAdd(DAY,number,'${rows[0]?.latestAsOfDate || '2022-02-19'}')) as asOfDate, + 'not-started' as status + FROM system.numbers + WHERE number < dateDiff('days',Date('${rows[0]?.latestAsOfDate || '2022-02-19'}'), Date(now())) + AND number > 0 + ` + }).then(()=>{console.log(`Done ${symbol}`);}) + ) + ), + {concurrency: 6} + ); +} + +await fillSyncStatuses(); \ No newline at end of file diff --git a/server/src/scripts/polygon.ts b/server/src/scripts/polygon.ts new file mode 100644 index 0000000..6402f35 --- /dev/null +++ b/server/src/scripts/polygon.ts @@ -0,0 +1,4 @@ +import pThrottle from 'p-throttle'; + +const apiKey = 'H95NTsatM1iTWLUwDLxM2J5zhUVYdCEz'; +export const getApiKey = pThrottle({limit: 5, interval: 60000})(()=>apiKey); \ No newline at end of file diff --git a/server/tables.sql b/server/tables.sql index 0f2f000..ff9b047 100644 --- a/server/tables.sql +++ b/server/tables.sql @@ -12,6 +12,15 @@ CREATE TABLE symbols ENGINE MergeTree() ORDER BY (symbol); +CREATE TABLE symbol_sync_statuses +( + symbol String, + asOfDate Date, + status ENUM('not-started','pending','done') +) +ENGINE MergeTree() +ORDER BY (asOfDate, symbol); + CREATE TABLE stock_aggregates ( symbol LowCardinality(String),